diff --git a/lib_com/cnst.h b/lib_com/cnst.h index f16284aafab0658585f5bb19d3088649e5807d17..0b9544f2f8cea8f4294a8a3eafd9f387e38056bb 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -752,6 +752,7 @@ enum #define GAMMA_EV 0.92f /* weighting factor for core synthesis error weighting */ #define FORMANT_SHARPENING_NOISE_THRESHOLD 21.0f /* lp_noise level above which formant sharpening is deactivated */ #define E_MIN_FX 1 /* QSCALE (Q7)*/ +#define E_MIN_IVAS_FX 1835 /* (Q19) (E12) */ #define STEP_DELTA_FX 11 #define FORMANT_SHARPENING_NOISE_THRESHOLD_FX 5376 /* 21 (!8)lp_noise level above which formant sharpening is deactivated - at this level most of 20 dB SNR office noisy speech still uses sharpening */ diff --git a/lib_com/fd_cng_com_fx.c b/lib_com/fd_cng_com_fx.c index 825d13cbbc441fc4b24ba501b7b87a8843acf750..8f55280149576e393514336613c8ecab6cf37edc 100644 --- a/lib_com/fd_cng_com_fx.c +++ b/lib_com/fd_cng_com_fx.c @@ -10,6 +10,7 @@ #include "rom_basop_util.h" #include "rom_com.h" #include "prot_fx.h" +#include "prot_fx_enc.h" #include "ivas_prot_fx.h" #define DELTA_SHIFT 2 @@ -2471,6 +2472,147 @@ void AnalysisSTFT_fx( } #endif // IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED +/*------------------------------------------------------------------- + * SynthesisSTFT_enc_ivas_fx() + * + * STFT synthesis filterbank + *-------------------------------------------------------------------*/ + +void SynthesisSTFT_enc_ivas_fx( + Word32 *fftBuffer, /* i : pointer to FFT bins */ + Word16 fftBufferExp, /* i : exponent of FFT bins */ + Word16 *timeDomainOutput, /* o : pointer to time domain signal */ + Word16 *olapBuffer, /* i/o : pointer to overlap buffer */ + const PWord16 *olapWin, /* i : pointer to overlap window */ + Word16 tcx_transition, + HANDLE_FD_CNG_COM hFdCngCom, /* i/o : pointer to FD_CNG structure containing all buffers and variables */ + Word16 gen_exc, + Word16 *Q_new, /* i : Q of generated exc_cng */ + const Word16 element_mode, /* i : element mode */ + const Word16 nchan_out /* i : number of output channels */ +) +{ + Word16 i, len, scale, tmp; + Word16 len2, len3, len4; + Word16 buf[M + 1 + L_FRAME16k]; + + + /* Perform IFFT */ + scale = 0; + move16(); + BASOP_rfft( fftBuffer, hFdCngCom->fftlen, &scale, 1 ); + fftBufferExp = add( fftBufferExp, scale ); + hFdCngCom->fftBuffer_exp = fftBufferExp; + move16(); + + fftBufferExp = add( fftBufferExp, hFdCngCom->fftlenShift ); + + /* Perform overlap-add */ + /* Handle overlap in P/S domain for stereo */ + test(); + test(); + IF( ( EQ_16( element_mode, IVAS_CPE_TD ) || EQ_16( element_mode, IVAS_CPE_DFT ) ) && EQ_16( nchan_out, 2 ) ) + { + Copy( olapBuffer + 3 * hFdCngCom->frameSize / 4 - ( M + 1 ), buf, hFdCngCom->frameSize + M + 1 ); + set16_fx( olapBuffer, 0, hFdCngCom->fftlen ); + } + ELSE + { + Copy( olapBuffer + hFdCngCom->frameSize, olapBuffer, hFdCngCom->frameSize ); + set16_fx( olapBuffer + hFdCngCom->frameSize, 0, hFdCngCom->frameSize ); + } + len2 = shr( hFdCngCom->fftlen, 2 ); + len4 = shr( hFdCngCom->fftlen, 3 ); + len3 = add( len2, len4 ); + len = add( hFdCngCom->frameSize, len4 ); + IF( tcx_transition ) + { + FOR( i = 0; i < len; i++ ) + { + olapBuffer[i] = round_fx_sat( L_shl_sat( fftBuffer[i], sub( fftBufferExp, 15 ) ) ); + move16(); + } + } + ELSE + { + FOR( i = 0; i < len4; i++ ) + { + olapBuffer[i + 1 * len4] = add_sat( olapBuffer[i + 1 * len4], mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 1 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[i].v.im ) ); + move16(); + olapBuffer[i + 2 * len4] = add_sat( olapBuffer[i + 2 * len4], mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 2 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[len4 - 1 - i].v.re ) ); + move16(); + } + FOR( i = len3; i < len; i++ ) + { + olapBuffer[i] = round_fx_sat( L_shl_sat( fftBuffer[i], sub( fftBufferExp, 15 ) ) ); + move16(); + } + } + + FOR( i = 0; i < len4; i++ ) + { + olapBuffer[i + 5 * len4] = mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 5 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[i].v.re ); + move16(); + olapBuffer[i + 6 * len4] = mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 6 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[len4 - 1 - i].v.im ); + move16(); + } + + len = add( len, len2 ); + FOR( i = len; i < hFdCngCom->fftlen; i++ ) + { + olapBuffer[i] = 0; + move16(); + } + + /* Get time-domain signal */ + FOR( i = 0; i < hFdCngCom->frameSize; i++ ) + { + timeDomainOutput[i] = mult_r( olapBuffer[i + len4], hFdCngCom->fftlenFac ); + move16(); + } + /* Generate excitation */ + test(); + test(); + IF( ( EQ_16( element_mode, IVAS_CPE_TD ) || EQ_16( element_mode, IVAS_CPE_DFT ) ) && EQ_16( nchan_out, 2 ) ) + { + FOR( i = 0; i < hFdCngCom->frameSize / 2; i++ ) + { + buf[i + ( M + 1 )] = add( buf[i + ( M + 1 )], olapBuffer[i + hFdCngCom->frameSize / 4] ); + move16(); + } + + FOR( i = 0; i < M + 1 + hFdCngCom->frameSize; i++ ) + { + buf[i] = mult_r( buf[i], hFdCngCom->fftlenFac ); + move16(); + } + } + ELSE + { + FOR( i = 0; i < M + 1 + hFdCngCom->frameSize; i++ ) + { + buf[i] = mult_r( olapBuffer[i + len4 - M - 1], hFdCngCom->fftlenFac ); + move16(); + } + tmp = buf[0]; + move16(); + } + IF( EQ_16( gen_exc, 1 ) ) + { + + E_UTIL_f_preemph2( sub( *Q_new, 1 ), buf + 1, PREEMPH_FAC, M + hFdCngCom->frameSize, &tmp ); + Residu3_fx( hFdCngCom->A_cng, buf + 1 + M, hFdCngCom->exc_cng, hFdCngCom->frameSize, 1 ); + } + IF( EQ_16( gen_exc, 2 ) ) + { + *Q_new = E_UTIL_f_preemph3( buf + 1, PREEMPH_FAC, M + hFdCngCom->frameSize, &tmp, 1 ); + move16(); + Residu3_fx( hFdCngCom->A_cng, buf + 1 + M, hFdCngCom->exc_cng, hFdCngCom->frameSize, 1 ); + } +} +#endif + /*------------------------------------------------------------------- * SynthesisSTFT() * diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index a7883abe53b6d98de2381b6e8239c1af720ebf04..d86aa894eda9a6187b351dc1a8f9b74fe16900fc 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -2644,11 +2644,11 @@ void unclr_classifier_dft( CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ ); #endif - +#ifndef IVAS_FLOAT_FIXED void unclr_classifier_td( CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ ); - +#endif #ifdef IVAS_FLOAT_FIXED void xtalk_classifier_dft_fx( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ @@ -2661,11 +2661,11 @@ void xtalk_classifier_dft( const int16_t itd, /* i : ITD from DFT stereo - used as a feature */ const float gcc_phat[] /* i : GPHAT cross-channel correlation function */ ); - +#ifndef IVAS_FLOAT_FIXED void xtalk_classifier_td( CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ ); - +#endif /*----------------------------------------------------------------------------------* * TD Stereo prototypes @@ -2691,7 +2691,7 @@ void stereo_tdm_prep_dwnmx ( const float *input1, /* i : right channel input */ const int16_t input_frame /* i : frame lenght */ ); - +#ifndef IVAS_FLOAT_FIXED int16_t stereo_tdm_ener_analysis( const int16_t ivas_format, /* i : IVAS format */ CPE_ENC_HANDLE hCPE, /* i : CPE structure */ @@ -2699,7 +2699,7 @@ int16_t stereo_tdm_ener_analysis( int16_t *tdm_SM_or_LRTD_Pri, /* o : channel combination scheme flag in TD stereo OR LRTD primary channel */ int16_t *tdm_ratio_idx_SM /* o : TDM ratio index for SM mode */ ); - +#endif void stereo_tdm_downmix( STEREO_TD_ENC_DATA_HANDLE hStereoTD, /* i : TD stereo IVAS encoder structure */ float *Left_in, /* i/o: Left channel -> Primary channel */ @@ -3658,23 +3658,23 @@ ivas_error front_vad( ); #ifdef IVAS_FLOAT_FIXED ivas_error front_vad_fx( - CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure, nullable */ - Encoder_State *st, /* i/o: encoder state structure */ - const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : configuration structure */ - FRONT_VAD_ENC_HANDLE *hFrontVads, /* i/o: front-VAD handles */ - const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ - const int16_t input_frame, /* i : frame length */ - int16_t vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ - float fr_bands[][2 * NB_BANDS], /* i : energy in frequency bands */ - float Etot_LR[], /* o : total energy Left & Right channel */ - float lf_E[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */ - int16_t localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ - int16_t vad_hover_flag[], /* o : VAD hangover flag */ - float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN */ - float *PS_out, /* o : energy spectrum */ - float *Bin_E_out, /* o : log-energy spectrum of the current frame*/ - Word16 Qinp -); + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure, nullable */ + Encoder_State *st, /* i/o: encoder state structure */ + const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i : configuration structure */ + FRONT_VAD_ENC_HANDLE *hFrontVads, /* i/o: FrontVad handles */ + const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */ + const int16_t input_frame, /* i : frame length */ + int16_t vad_flag_dtx[], /* o : HE-SAD flag with additional DTX HO */ + float fr_bands[][2 * NB_BANDS], /* i : energy in frequency bands */ + float Etot_LR[], /* o : total energy Left & Right channel */ + float lf_E[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */ + int16_t localVAD_HE_SAD[], /* o : HE-SAD flag without hangover, LR channels */ + int16_t vad_hover_flag[], /* o : VAD hangover flag */ + float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN*/ + float *PS_out, /* o : energy spectrum */ + float *Bin_E_out, /* o : log-energy spectrum of the current frame */ + Word16 Q_inp , + Word16 Q_add ); #endif ivas_error front_vad_spar( SPAR_ENC_HANDLE hSpar, /* i/o: SPAR encoder structure */ diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index f71479f81ce888fa1302929b38eccc68f0d3b29a..a87cca412105a7831fdf90bd802f28c049548898 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -2045,6 +2045,16 @@ void masa_compensate_two_dir_energy_ratio_index_fx( const Word16 hodirac_flag /* i : flag to indicate HO-DirAC mode */ ); +#ifdef IVAS_FLOAT_FIXED +int16_t stereo_tdm_ener_analysis_fx( + const Word16 ivas_format, /* i : IVAS format */ + CPE_ENC_HANDLE hCPE, /* i : CPE structure */ + const Word16 input_frame, /* i : Number of samples */ + Word16 *tdm_SM_or_LRTD_Pri, /* o : channel combination scheme flag in TD stereo OR LRTD primary channel */ + Word16 *tdm_ratio_idx_SM /* o : TDM ratio index for SM mode */ +); +#endif + #ifdef IVAS_FLOAT_FIXED /*! r: projected azimuth index */ Word16 ivas_dirac_project_azimuth_index( @@ -3043,4 +3053,14 @@ void tdm_ol_pitch_comparison_fx( Word16 pitch_fr[CPE_CHANNELS][NB_SUBFR], /* i/o: fractional pitch values, Q6 */ Word16 voicing_fr[CPE_CHANNELS][NB_SUBFR] /* i/o: fractional pitch gains, Q15 */ ); + +#ifdef IVAS_FLOAT_FIXED +void xtalk_classifier_td_fx( + CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ +); + +void unclr_classifier_td_fx( + CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ +); +#endif #endif diff --git a/lib_com/prot.h b/lib_com/prot.h index 48761b078b9c1c4cd34b34a51c5125fe356e6abf..61568e3b25a39dc482ca2a85a0c7080b8f28968b 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -5985,10 +5985,13 @@ void core_switching_pre_enc( ); void core_switching_post_enc( - Encoder_State *st, /* i/o: encoder state structure */ - const float *old_inp_12k8, /* i : old input signal @12.8kHz */ - const float *old_inp_16k, /* i : old input signal @16kHz */ - const float A[] /* i : unquant LP filter coefs. */ + Encoder_State *st, /* i/o: encoder state structure */ + // const float *old_inp_12k8, /* i : old input signal @12.8kHz */ + float *old_inp_12k8, /* i : old input signal @12.8kHz */ + // const float *old_inp_16k, /* i : old input signal @16kHz */ + float *old_inp_16k, /* i : old input signal @16kHz */ + // const float A[] /* i : unquant. LP filter coefs. */ + float A[] /* i : unquant. LP filter coefs. */ ); ivas_error core_switching_post_dec( diff --git a/lib_enc/acelp_core_enc.c b/lib_enc/acelp_core_enc.c index ede6d9591b3b1d64c03bc88766938997df9a8915..f25becbc9980b374ad5707c31c12735e162159a2 100644 --- a/lib_enc/acelp_core_enc.c +++ b/lib_enc/acelp_core_enc.c @@ -606,7 +606,25 @@ ivas_error acelp_core_enc( st->hDtxEnc->last_CNG_L_frame = st->L_frame; } - generate_comfort_noise_enc( st ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 Q_cngNoise = Q_factor_arrL( st->hFdCngEnc->hFdCngCom->cngNoiseLevel_flt, FFTCLDFBLEN ); + floatToFixed_arrL( st->hFdCngEnc->hFdCngCom->cngNoiseLevel_flt, st->hFdCngEnc->hFdCngCom->cngNoiseLevel, Q_cngNoise, FFTCLDFBLEN ); + st->hFdCngEnc->hFdCngCom->cngNoiseLevelExp = sub( 31, Q_cngNoise ); + IF( st->hTdCngEnc != NULL ) + { + st->hTdCngEnc->CNG_att_fx = float_to_fix16( st->hTdCngEnc->CNG_att, Q7 ); + } + st->preemph_fac = float_to_fix16( st->preemph_fac_flt, Q15 ); +#endif + // generate_comfort_noise_enc( st ); + generate_comfort_noise_enc_ivas_fx( st, Q_new, 1 ); + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + fixedToFloat_arr( st->hFdCngEnc->hFdCngCom->exc_cng, st->hFdCngEnc->hFdCngCom->exc_cng_flt, Q_new, st->L_frame ); + fixedToFloat_arr( st->hFdCngEnc->hFdCngCom->olapBufferSynth, st->hFdCngEnc->hFdCngCom->olapBufferSynth_flt, st->hFdCngEnc->hFdCngCom->fftlenShift, FFTLEN ); + fixedToFloat_arr( st->hFdCngEnc->hFdCngCom->timeDomainBuffer, st->hFdCngEnc->hFdCngCom->timeDomainBuffer_flt, Q_new, st->hFdCngEnc->hFdCngCom->frameSize ); + fixedToFloat_arr( st->hTcxEnc->Txnq, st->hTcxEnc->Txnq_flt, Q_new, ( L_FRAME32k / 2 + 64 ) ); +#endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS floatToFixed_arr( st->hFdCngEnc->hFdCngCom->A_cng_flt, st->hFdCngEnc->hFdCngCom->A_cng, Q12, ( M + 1 ) ); @@ -1309,7 +1327,7 @@ ivas_error acelp_core_enc( Word16 q_syn = Q_factor_arr( syn, L_FRAME16k ); q_syn = min( q_syn, Q_factor_arr( res, L_FRAME16k ) ); floatToFixed_arr16( syn, syn_fx, q_syn, L_FRAME16k ); - floatToFixed_arr( pitch_buf, pitch_buf_fx, Q6, NB_SUBFR16k ); + floatToFixed_arr( pitch_buf, pitch_buf_fx, Q6, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float floatToFixed_arr16( res, res_fx, q_syn, L_FRAME16k ); #endif FEC_encode_ivas_fx( hBstr, st->acelp_cfg, syn_fx, st->coder_type, st->clas, pitch_buf_fx, res_fx, &st->Last_pulse_pos, st->L_frame, st->total_brate, q_syn ); @@ -1367,7 +1385,7 @@ ivas_error acelp_core_enc( #ifdef IVAS_FLOAT_FIXED_CONVERSIONS // Word16 voice_factors_fx[NB_SUBFR16k]; - floatToFixed_arr( voice_factors, voice_factors_fx, Q15, 5 ); + floatToFixed_arr( voice_factors, voice_factors_fx, Q15, 5 ); // Saturation Conversion used as last values have garbage values even in float st->hBWE_TD->bwe_non_lin_prev_scale_fx = floatToFixed( st->hBWE_TD->bwe_non_lin_prev_scale, Q30 ); @@ -1409,7 +1427,7 @@ ivas_error acelp_core_enc( #ifdef IVAS_FLOAT_FIXED_CONVERSIONS // Word16 pitch_buf_fx[NB_SUBFR16k]; - floatToFixed_arr( pitch_buf, pitch_buf_fx, Q6, NB_SUBFR16k ); + floatToFixed_arr( pitch_buf, pitch_buf_fx, Q6, NB_SUBFR16k ); // Saturation Conversion used as last values have garbage values even in float floatToFixed_arr16( st->old_pitch_buf, st->old_pitch_buf_fx, Q6, 2 * NB_SUBFR16k ); Es_pred_fx = float_to_fix16( Es_pred, Q8 ); diff --git a/lib_enc/acelp_core_switch_enc_fx.c b/lib_enc/acelp_core_switch_enc_fx.c index 9ab60f5d76c32855bc03b007e9cc6f56e55d054e..029e3083ea893830d663c161a859cc83aa2943f9 100644 --- a/lib_enc/acelp_core_switch_enc_fx.c +++ b/lib_enc/acelp_core_switch_enc_fx.c @@ -11,7 +11,7 @@ #include "prot_fx.h" /* Function prototypes */ #include "prot_fx_enc.h" /* Function prototypes */ #include "basop_util.h" /* Function prototypes */ - +#include "prot.h" /*---------------------------------------------------------------------* * Local function prototypes *---------------------------------------------------------------------*/ @@ -26,8 +26,19 @@ static void encod_gen_voic_core_switch_fx( Encoder_State *st_fx, const Word32 core_bitrate_fx, Word16 shift, Word16 Q_new ); +static void encod_gen_voic_core_switch_ivas_fx( Encoder_State *st_fx, + const Word16 L_frame_fx, + const Word16 inp_fx[], + const Word16 Aq_fx[], + const Word16 A_fx[], + const Word16 T_op[], + Word16 *exc_fx, + const Word32 core_bitrate_fx, + Word16 shift, + Word16 Q_new ); static void bwe_switch_enc_fx( Encoder_State *st_fx, const Word16 *new_speech ); +static void bwe_switch_enc_ivas_fx( Encoder_State *st_fx, const Word16 *new_speech ); static Word16 dotprod_satcont( const Word16 *x, const Word16 *y, Word16 qx, Word16 qy, Word16 *qo, Word16 len, Word16 delta ); @@ -181,6 +192,118 @@ void acelp_core_switch_enc_fx( return; } + +void acelp_core_switch_enc_ivas_fx( + Encoder_State *st_fx, /* i/o: encoder state structure */ + const Word16 inp12k8[], /* i : input signal @12.8 kHz Q0 */ + const Word16 inp16k[], /* i : input signal @16 kHz Q0 */ + const Word16 A[NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes Q12*/ + Word16 shift, + Word16 Q_new ) +{ + Word16 i, j, T_op[2]; + Word16 old_exc[L_EXC], *exc; /* excitation signal buffer Qexc */ + const Word16 *inp; + Word32 cbrate; + Word16 Aq[2 * ( M + 1 )]; + LPD_state_HANDLE hLPDmem; /* ACELP LPDmem memories */ + BSTR_ENC_HANDLE hBstr = st_fx->hBstr; + Word16 nb_bits; + UWord16 value; + + hLPDmem = st_fx->hLPDmem; + + /* initializations */ + exc = &old_exc[L_EXC_MEM]; + move16(); /* pointer to excitation signal in the current frame */ + Copy( hLPDmem->old_exc, old_exc, L_EXC_MEM ); /*now old_exc has the same scaling as st_fx->old_exc; need to change later? */ + + Copy( st_fx->old_Aq_12_8_fx, Aq, M + 1 ); + Copy( st_fx->old_Aq_12_8_fx, Aq + ( M + 1 ), M + 1 ); + T_op[0] = st_fx->pitch[0]; + move16(); + T_op[1] = st_fx->pitch[1]; + move16(); + + /*----------------------------------------------------------------* + * set switching frame bitrate + *----------------------------------------------------------------*/ + + IF( EQ_16( st_fx->last_L_frame, L_FRAME ) ) /* ACELP@12k8 core */ + { + inp = inp12k8; + + IF( GT_32( st_fx->core_brate, ACELP_24k40 ) ) + { + cbrate = L_add( ACELP_24k40, 0 ); + } + ELSE + { + cbrate = L_add( st_fx->core_brate, 0 ); + } + } + ELSE /* ACELP@16k core */ + { + inp = inp16k; + + IF( LE_32( st_fx->core_brate, ACELP_8k00 ) ) + { + cbrate = L_add( ACELP_8k00, 0 ); + } + ELSE IF( LE_32( st_fx->core_brate, ACELP_14k80 ) ) + { + cbrate = L_add( ACELP_14k80, 0 ); + } + ELSE + { + cbrate = L_min( st_fx->core_brate, ACELP_22k60 ); + } + } + + IF( NE_16( st_fx->last_L_frame, L_FRAME ) ) + { + T_op[0] = shr( add( round_fx( L_shl( L_mult( 20480, T_op[0] ), 2 ) ), 1 ), 1 ); + move16(); + T_op[1] = shr( add( round_fx( L_shl( L_mult( 20480, T_op[1] ), 2 ) ), 1 ), 1 ); + move16(); + } + + /*----------------------------------------------------------------* + * Excitation encoding + *----------------------------------------------------------------*/ + config_acelp1_IVAS( ENC, st_fx->total_brate, cbrate, st_fx->core, -1, -1, st_fx->last_L_frame, -1, &( st_fx->acelp_cfg ), hBstr->nb_bits_tot, + GENERIC, -1, -1, &j, &i, st_fx->element_mode, &i /*dummy*/, 0 /*tdm_lp_reuse_flag*/, 0 /*tdm_low_rate_mode*/, st_fx->idchan, st_fx->active_fr_cnt_fx, 0 /*tdm_Pitch_reuse_flag*/, 0, 0 /*GSC_IVAS_mode*/ ); + + encod_gen_voic_core_switch_ivas_fx( st_fx, st_fx->last_L_frame, inp, Aq, A, T_op, exc, cbrate, shift, Q_new ); + + /*----------------------------------------------------------------* + * bit-stream: modify the layer of sub frame CELP + *----------------------------------------------------------------*/ + + i = find_indice( hBstr, TAG_ACELP_SUBFR_LOOP_START, &value, &nb_bits ); + + while ( hBstr->ind_list[i].id == TAG_ACELP_SUBFR_LOOP_START ) + { + push_indice( hBstr, IND_CORE_SWITCHING_CELP_SUBFRAME, hBstr->ind_list[i].value, hBstr->ind_list[i].nb_bits ); + i++; + } + delete_indice( hBstr, TAG_ACELP_SUBFR_LOOP_START ); + + /*----------------------------------------------------------------* + * BWE encoding + *----------------------------------------------------------------*/ + + test(); + test(); + IF( !( ( EQ_16( st_fx->last_L_frame, L_FRAME16k ) && EQ_16( inner_frame_tbl[st_fx->bwidth], L_FRAME16k ) ) || EQ_16( inner_frame_tbl[st_fx->bwidth], L_FRAME8k ) ) ) + { + bwe_switch_enc_ivas_fx( st_fx, (const Word16 *) st_fx->old_input_signal_fx ); + } + + return; +} + + /*-------------------------------------------------------------------* * encod_gen_voic_core_switch() * @@ -406,6 +529,228 @@ static void encod_gen_voic_core_switch_fx( return; } +static void encod_gen_voic_core_switch_ivas_fx( + Encoder_State *st_fx, /* i/o: state structure */ + const Word16 L_frame, /* i : length of the frame */ + const Word16 inp[], /* i : input signal */ + const Word16 Aq[], /* i : LP coefficients */ + const Word16 A[], /* i : unquantized A(z) filter */ + const Word16 T_op[], /* i : open loop pitch */ + Word16 *exc, /* i/o: current non-enhanced excitation */ + const Word32 core_bitrate, /* i : switching frame bitrate */ + Word16 shift, + Word16 Q_new ) +{ + Word16 res[L_SUBFR]; /* residual signal Qexc */ + Word16 Ap[M + 1]; /* A(z) with spectral expansion Q12 */ + Word16 xn[L_SUBFR]; /* Target vector for pitch search */ + Word16 xn2[L_SUBFR]; /* Target vector for codebook search */ + Word16 cn[L_SUBFR]; /* Target vector in residual domain */ + Word16 h1[L_SUBFR + ( M + 1 )]; /* Impulse response vector */ + + Word16 code[L_SUBFR]; /* Fixed codebook excitation Q9 */ + Word16 y1[L_SUBFR]; /* Filtered adaptive excitation */ + Word16 y2[L_SUBFR]; /* Filtered algebraic excitation */ + Word16 gain_pit; /* Pitch gain Q15 */ + Word16 voice_fac; /* Voicing factor Q15 */ + Word32 gain_code; /* Gain of code Q16 */ + Word16 gain_inov; /* inovation gain */ + Word16 i, gcode16; /* tmp variables */ + Word16 T0, T0_frac; /* close loop integer pitch and fractional part */ + Word16 T0_min, T0_max; /* pitch variables */ + + Word16 pitch, tmp16; /* floating pitch value */ + Word16 g_corr[6]; /* ACELP correl, values + gain pitch */ + Word16 clip_gain; /* ISF clip gain */ + + Word16 unbits; /* number of unused bits for PI */ + Word32 norm_gain_code; + Word16 pitch_limit_flag; + Word32 L_tmp, Lgcode; + Word16 shift_wsp; + Word16 h2[L_SUBFR + ( M + 1 )]; + Word16 dummyF[NB_SUBFR16k]; + Word16 lp_select, lp_flag; + LPD_state_HANDLE hLPDmem; /* ACELP LPDmem memories */ + BSTR_ENC_HANDLE hBstr; + + hLPDmem = st_fx->hLPDmem; + hBstr = st_fx->hBstr; + + // TD_CNG_ENC_HANDLE hTdCngEnc = st_fx->hTdCngEnc; + + /*------------------------------------------------------------------* + * Initializations + *------------------------------------------------------------------*/ + + shift_wsp = add( Q_new, shift ); + + unbits = 0; + move16(); + + IF( EQ_16( L_frame, L_FRAME ) ) + { + T0_max = PIT_MAX; + move16(); + T0_min = PIT_MIN; + move16(); + } + ELSE /* L_frame == L_FRAME16k */ + { + T0_max = PIT16k_MAX; + move16(); + T0_min = PIT16k_MIN; + move16(); + } + + /*------------------------------------------------------------------* + * Calculation of LP residual (filtering through A[z] filter) + *------------------------------------------------------------------*/ + + tmp16 = st_fx->L_frame; + move16(); + st_fx->L_frame = L_SUBFR; + move16(); + calc_residu_fx( st_fx, inp, res, Aq ); + + // hTdCngEnc->burst_ho_cnt = 0; + st_fx->L_frame = tmp16; + move16(); + + /*------------------------------------------------------------------* + * ACELP subframe loop + *------------------------------------------------------------------*/ + + + Copy( res, exc, L_SUBFR ); + + IF( EQ_16( L_frame, L_FRAME16k ) ) + { + weight_a_fx( A, Ap, GAMMA16k, M ); /* Bandwidth expansion of A(z) filter coefficients */ + find_targets_ivas_fx( inp, hLPDmem->mem_syn, 0, &( hLPDmem->mem_w0 ), Aq, res, L_SUBFR, Ap, PREEMPH_FAC_16k, xn, cn, h1 ); + } + ELSE + { + weight_a_fx( A, Ap, GAMMA1, M ); /* Bandwidth expansion of A(z) filter coefficients */ + find_targets_ivas_fx( inp, hLPDmem->mem_syn, 0, &( hLPDmem->mem_w0 ), Aq, res, L_SUBFR, Ap, TILT_FAC_FX, xn, cn, h1 ); + } + + /*Scale_sig(h1, L_SUBFR, shift); */ /*Q14-shift */ + Copy_Scale_sig( h1, h2, L_SUBFR, -2 ); + Scale_sig( h1, L_SUBFR, add( 1, shift ) ); /* set h1[] in Q14 with scaling for convolution */ + + /* scaling of xn[] to limit dynamic at 12 bits */ + Scale_sig( xn, L_SUBFR, shift ); + + /*----------------------------------------------------------------* + * Close-loop pitch search and quantization + * Adaptive exc. construction + *----------------------------------------------------------------*/ + set16_fx( dummyF, -1, NB_SUBFR16k ); /* hack to signal ACELP->HQ switching frame */ + pitch = pit_encode_ivas_fx( hBstr, + st_fx->acelp_cfg.pitch_bits, core_bitrate, 0, L_frame, GENERIC, &pitch_limit_flag, 0, exc, L_SUBFR, T_op, &T0_min, &T0_max, &T0, &T0_frac, h1, xn, 0 /*hStereoTD->tdm_Pitch_reuse_flag*/, dummyF /*hStereoTD->tdm_Pri_pitch_buf*/ ); + + /*-----------------------------------------------------------------* + * Find adaptive exitation + *-----------------------------------------------------------------*/ + + pred_lt4( &exc[0], &exc[0], T0, T0_frac, L_SUBFR + 1, pitch_inter4_2, L_INTERPOL2, PIT_UP_SAMP ); + + /*-----------------------------------------------------------------* + * Gain clipping test to avoid unstable synthesis on frame erasure + * or in case of floating point encoder & fixed p. decoder + *-----------------------------------------------------------------*/ + + clip_gain = gp_clip_fx( st_fx->element_mode, core_bitrate, st_fx->voicing_fx, 0, GENERIC, xn, st_fx->clip_var_fx, sub( shift_wsp, 1 ) ); + + /*-----------------------------------------------------------------* + * LP filtering of the adaptive excitation, codebook target computation + *-----------------------------------------------------------------*/ + lp_flag = st_fx->acelp_cfg.ltf_mode; + lp_select = lp_filt_exc_enc_ivas_fx( MODE1, GENERIC, 0, exc, h1, xn, y1, xn2, L_SUBFR, L_frame, g_corr, clip_gain, &gain_pit, &lp_flag ); + + IF( EQ_16( lp_flag, NORMAL_OPERATION ) ) + { + push_indice( hBstr, IND_LP_FILT_SELECT, lp_select, 1 ); + } + + /*-----------------------------------------------------------------* + * Innovation encoding + *-----------------------------------------------------------------*/ + + inov_encode_ivas_fx( st_fx, core_bitrate, 0, L_frame, st_fx->last_L_frame, GENERIC, st_fx->bwidth, 0, 0, -1, Aq, gain_pit, cn, exc, + h2, hLPDmem->tilt_code, pitch, xn2, code, y2, &unbits, L_SUBFR, shift, Q_new ); + + /*-----------------------------------------------------------------* + * Gain encoding + *-----------------------------------------------------------------*/ + IF( EQ_16( L_frame, L_FRAME ) ) + { + gain_enc_mless_ivas_fx( hBstr, st_fx->acelp_cfg.gains_mode, st_fx->element_mode, L_frame, 0, -1, xn, y1, shift_wsp, y2, code, st_fx->old_Es_pred_fx, + &gain_pit, &gain_code, &gain_inov, &norm_gain_code, g_corr, clip_gain ); + } + ELSE + { + gain_enc_mless_ivas_fx( hBstr, st_fx->acelp_cfg.gains_mode, st_fx->element_mode, L_frame, 0, -1, xn, y1, shift_wsp, y2, code, st_fx->old_Es_pred_fx, + &gain_pit, &gain_code, &gain_inov, &norm_gain_code, g_corr, clip_gain ); + } + + gp_clip_test_gain_pit_fx( st_fx->element_mode, core_bitrate, gain_pit, st_fx->clip_var_fx ); + + Lgcode = L_shl( gain_code, Q_new ); /* scaled gain_code with Qnew -> Q16*/ + gcode16 = round_fx( Lgcode ); + + // hLPDmem->tilt_code = Est_tilt2( exc + 0, gain_pit, code, gain_code, &voice_fac, shift ); + // Q_new or shift ?? ->Qexc + hLPDmem->tilt_code = est_tilt_ivas_fx( exc + 0, gain_pit, code, gain_code, &voice_fac, Q_new, L_SUBFR, 0 ); + move16(); + /*-----------------------------------------------------------------* + * Construct adaptive part of the excitation + *-----------------------------------------------------------------*/ + + FOR( i = 0; i < L_SUBFR; i++ ) + { + /* code in Q9, gain_pit in Q14 */ + L_tmp = L_mult( gcode16, code[i] ); +#ifdef BASOP_NOGLOB + L_tmp = L_shl_sat( L_tmp, 5 ); + L_tmp = L_mac_sat( L_tmp, exc[i], gain_pit ); + L_tmp = L_shl_sat( L_tmp, 1 ); /* saturation can occur here */ + exc[i] = round_fx_sat( L_tmp ); +#else + L_tmp = L_shl( L_tmp, 5 ); + L_tmp = L_mac( L_tmp, exc[i], gain_pit ); + L_tmp = L_shl( L_tmp, 1 ); /* saturation can occur here */ + exc[i] = round_fx( L_tmp ); +#endif + move16(); + } + + /* write reserved bits */ + IF( unbits ) + { + push_indice( hBstr, IND_UNUSED, 0, unbits ); + } + + /*-----------------------------------------------------------------* + * long term prediction on the 2nd sub frame + *-----------------------------------------------------------------*/ + + pred_lt4( &exc[L_SUBFR], &exc[L_SUBFR], T0, T0_frac, L_SUBFR + 1, pitch_inter4_2, L_INTERPOL2, PIT_UP_SAMP ); + + FOR( i = L_SUBFR; i < 2 * L_SUBFR; i++ ) + { +#ifdef BASOP_NOGLOB + exc[i] = round_fx_sat( L_shl_sat( L_mult_sat( exc[i], gain_pit ), 1 ) ); +#else + exc[i] = round_fx( L_shl( L_mult( exc[i], gain_pit ), 1 ) ); +#endif + move16(); + } + + return; +} + /*-------------------------------------------------------------------* * bwe_switch_enc() @@ -559,6 +904,154 @@ static void bwe_switch_enc_fx( return; } +static void bwe_switch_enc_ivas_fx( + Encoder_State *st_fx, /* i/o: encoder state structure */ + const Word16 *new_speech_fx /* i : original input signal Q0 */ +) +{ + + Word16 k, Loverlapp_fx, d1m_fx, maxd1_fx, ind1_fx, gapsize_fx; + Word16 delta_fx, fdelay_fx; + const Word16 *hp_filter_fx; + const Word16 *fpointers_tab[6] = { hp12800_16000_fx, hp12800_32000_fx, hp12800_48000_fx, hp16000_48000_fx, hp16000_32000_fx, hp16000_48000_fx }; + Word16 tmp, shift; + const Word16 *ptmp; + Word16 tmp_mem_fx[2 * L_FILT48k], tmp_mem2_fx[2 * L_FILT48k], hb_synth_tmp_fx[480]; + Word16 Fs_kHz; + Word16 q_tmp1, q_tmp2, Qmc, Qsq; + Word32 L_tmp1, L_tmp2, L_tmp3, min_sq_cross_fx; + Word16 accA_fx, accB_fx, min_corr_fx, E1_fx, E2_fx, gain_fx; + Word16 synth_subfr_bwe_fx[SWITCH_MAX_GAP]; /* synthesized bwe for core switching */ + Word16 n, L; + BSTR_ENC_HANDLE hBstr; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + hBstr = st_fx->hBstr; + L = NS2SA_FX2( st_fx->input_Fs, FRAME_SIZE_NS ); + + /* set multiplication factor according to the sampling rate */ + tmp = extract_l( L_shr( st_fx->input_Fs, 14 ) ); + delta_fx = add( tmp, 1 ); + Fs_kHz = shl( delta_fx, 4 ); + tmp = add( tmp, i_mult2( 3, ( sub( st_fx->last_L_frame, L_FRAME ) != 0 ) ) ); + ptmp = fpointers_tab[tmp]; + move16(); + + hp_filter_fx = ptmp; + fdelay_fx = i_mult2( 16, delta_fx ); + IF( EQ_16( st_fx->last_L_frame, L_FRAME ) ) + { + fdelay_fx = i_mult2( 20, delta_fx ); + } + + n = i_mult2( N16_CORE_SW, delta_fx ); + + set16_fx( tmp_mem_fx, 0, 2 * L_FILT48k ); + set16_fx( tmp_mem2_fx, 0, 2 * L_FILT48k ); + + Loverlapp_fx = i_mult2( delta_fx, SWITCH_OVERLAP_8k * 2 ); + gapsize_fx = i_mult2( delta_fx, NS2SA( 16000, SWITCH_GAP_LENGTH_NS ) ); + + shift = sub( add( add( shr( L, 1 ), n ), Loverlapp_fx ), gapsize_fx ); + Copy( new_speech_fx + shift, synth_subfr_bwe_fx, add( gapsize_fx, fdelay_fx ) ); + Copy( new_speech_fx + sub( shift, fdelay_fx ), tmp_mem_fx, fdelay_fx ); + + tmp = add( gapsize_fx, fdelay_fx ); + fir_fx( synth_subfr_bwe_fx, hp_filter_fx, synth_subfr_bwe_fx, tmp_mem_fx, tmp, fdelay_fx, 0, 0 ); + Copy( synth_subfr_bwe_fx + shr( fdelay_fx, 1 ), synth_subfr_bwe_fx, sub( gapsize_fx, shr( fdelay_fx, 1 ) ) ); + + tmp = i_mult2( Fs_kHz, 10 ); + fir_fx( new_speech_fx, hp_filter_fx, hb_synth_tmp_fx, tmp_mem2_fx, tmp, fdelay_fx, 1, 0 ); + + min_sq_cross_fx = L_negate( 1 ); + Qsq = 0; + move16(); + min_corr_fx = 0; + move16(); + Qmc = 0; + move16(); + d1m_fx = 0; + move16(); + + maxd1_fx = sub( tmp, add( gapsize_fx, fdelay_fx ) ); + + IF( EQ_16( delta_fx, 2 ) ) + { + maxd1_fx = shr( maxd1_fx, 1 ); + } + ELSE IF( EQ_16( delta_fx, 3 ) ) + { + maxd1_fx = extract_h( L_mult( maxd1_fx, 10923 ) ); + } + + /* find delay */ + ptmp = &hb_synth_tmp_fx[fdelay_fx]; + + FOR( k = 0; k < maxd1_fx; k++ ) + { + accA_fx = dotprod_satcont( ptmp, ptmp, 0, 0, &q_tmp1, gapsize_fx, delta_fx ); + accB_fx = dotprod_satcont( ptmp, synth_subfr_bwe_fx, 0, 0, &q_tmp2, gapsize_fx, delta_fx ); + ptmp += delta_fx; + L_tmp1 = L_mult0( accB_fx, accB_fx ); /*2*q_tmp2; */ + L_tmp2 = Mult_32_16( L_tmp1, min_corr_fx ); /*2*q_tmp2+Qmc-15 */ + L_tmp3 = Mult_32_16( min_sq_cross_fx, accA_fx ); /*Qsq+q_tmp1-15 */ + shift = s_min( add( shl( q_tmp2, 1 ), Qmc ), add( q_tmp1, Qsq ) ); + L_tmp2 = L_shr( L_tmp2, sub( add( shl( q_tmp2, 1 ), Qmc ), shift ) ); + L_tmp3 = L_shr( L_tmp3, sub( add( q_tmp1, Qsq ), shift ) ); + + IF( GE_32( L_tmp2, L_tmp3 ) ) + { + d1m_fx = k; + move16(); + min_corr_fx = accA_fx; + move16(); + Qmc = q_tmp1; + move16(); + min_sq_cross_fx = L_add( L_tmp1, 0 ); + Qsq = shl( q_tmp2, 1 ); + move16(); + } + } + + push_indice( hBstr, IND_CORE_SWITCHING_AUDIO_DELAY, d1m_fx, AUDIODELAYBITS ); + + tmp = add( i_mult2( d1m_fx, delta_fx ), fdelay_fx ); + ptmp = &hb_synth_tmp_fx[tmp]; + move16(); + E1_fx = dotprod_satcont( synth_subfr_bwe_fx, synth_subfr_bwe_fx, 0, 0, &q_tmp1, gapsize_fx, 1 ); + E2_fx = dotprod_satcont( ptmp, ptmp, 0, 0, &q_tmp2, gapsize_fx, 1 ); + + IF( !E1_fx ) + { + E1_fx = shl( 1, 14 ); + q_tmp1 = 14; + move16(); + } + IF( !E2_fx ) + { + E2_fx = shl( 1, 14 ); + q_tmp2 = 14; + move16(); + } + + tmp = div_s( shl( 1, 14 ), E1_fx ); /*Q(29-q_tmp1) */ + L_tmp1 = L_mult( tmp, E2_fx ); /*30-q_tmp1+q_tmp2 */ + q_tmp2 = sub( q_tmp1, q_tmp2 ); /*30-q_tmp2 */ + L_tmp1 = L_shl( L_tmp1, sub( q_tmp2, 24 ) ); +#ifdef BASOP_NOGLOB + gain_fx = round_fx_o( Isqrt( L_tmp1 ), &Overflow ); /*Q12 */ +#else + gain_fx = round_fx( Isqrt( L_tmp1 ) ); /*Q12 */ +#endif + ind1_fx = usquant_fx( gain_fx, &gain_fx, shr( MINVALUEOFFIRSTGAIN_FX, 1 ), shr( DELTAOFFIRSTGAIN_FX, 4 ), ( 1 << NOOFGAINBITS1 ) ); + push_indice( hBstr, IND_CORE_SWITCHING_AUDIO_GAIN, ind1_fx, NOOFGAINBITS1 ); + + return; +} + + static Word16 dotprod_satcont( const Word16 *x, const Word16 *y, Word16 qx, Word16 qy, Word16 *qo, Word16 len, Word16 delta ) { Word16 tmp_tabx[L_FRAME48k], tmp_taby[L_FRAME48k]; diff --git a/lib_enc/core_switching_enc.c b/lib_enc/core_switching_enc.c index 9b234099b56532173506c49f42913b5dffc154e3..3362ca589a2f65631b84ac95fdc9b67ada00c6ab 100644 --- a/lib_enc/core_switching_enc.c +++ b/lib_enc/core_switching_enc.c @@ -762,10 +762,13 @@ void core_switching_pre_enc_ivas_fx( *---------------------------------------------------------------------*/ void core_switching_post_enc( - Encoder_State *st, /* i/o: encoder state structure */ - const float *old_inp_12k8, /* i : old input signal @12.8kHz */ - const float *old_inp_16k, /* i : old input signal @16kHz */ - const float A[] /* i : unquant. LP filter coefs. */ + Encoder_State *st, /* i/o: encoder state structure */ + // const float *old_inp_12k8, /* i : old input signal @12.8kHz */ + float *old_inp_12k8, /* i : old input signal @12.8kHz */ + // const float *old_inp_16k, /* i : old input signal @16kHz */ + float *old_inp_16k, /* i : old input signal @16kHz */ + // const float A[] /* i : unquant. LP filter coefs. */ + float A[] /* i : unquant. LP filter coefs. */ ) { if ( st->core == HQ_CORE ) @@ -774,9 +777,53 @@ void core_switching_post_enc( if ( ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) && st->element_mode == EVS_MONO ) /* core switching ==> ACELP subframe encoding */ { +#ifndef IVAS_FLOAT_FIXED acelp_core_switch_enc( st, old_inp_12k8 + L_INP_MEM - NS2SA( INT_FS_12k8, ACELP_LOOK_NS ), old_inp_16k + L_INP_MEM - NS2SA( INT_FS_16k, ACELP_LOOK_NS ), A ); +#else +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + // conv params from float to fix + Word16 Q_new; + Q_new = Q_factor_arr( old_inp_12k8, L_INP_12k8 ); + Word16 old_inp_12k8_fx[L_INP_12k8]; + Word16 old_inp_16k_fx[L_INP]; + Word16 A_fx[NB_SUBFR16k * ( M + 1 )]; + floatToFixed_arr( st->old_input_signal, st->old_input_signal_fx, 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); + floatToFixed_arr( old_inp_12k8, old_inp_12k8_fx, Q_new, L_INP_12k8 ); + floatToFixed_arr( old_inp_16k, old_inp_16k_fx, Q_new, L_INP ); + floatToFixed_arr( A, A_fx, 12, NB_SUBFR16k * ( M + 1 ) ); + float temp = (float) st->clip_var[0] * ( 2.56f ); + st->clip_var_fx[0] = float_to_fix16( temp, 0 ); + st->clip_var_fx[1] = float_to_fix16( st->clip_var[1], 14 ); + st->clip_var_fx[2] = float_to_fix16( st->clip_var[2], 8 ); + st->clip_var_fx[3] = float_to_fix16( st->clip_var[3], 0 ); + st->clip_var_fx[4] = float_to_fix16( st->clip_var[4], 14 ); + st->clip_var_fx[5] = float_to_fix16( st->clip_var[5], 14 ); + floatToFixed_arr( st->voicing, st->voicing_fx, 15, 3 ); + st->hLPDmem->tilt_code = float_to_fix16( st->hLPDmem->tilt_code_flt, 15 ); + floatToFixed_arr( st->old_Aq_12_8, st->old_Aq_12_8_fx, 12, M + 1 ); + st->old_Es_pred_fx = float_to_fix16( st->old_Es_pred, 8 ); + st->hLPDmem->mem_w0 = float_to_fix16( st->hLPDmem->mem_w0_flt, Q_new - 1 ); + floatToFixed_arr( st->hLPDmem->mem_syn_flt, st->hLPDmem->mem_syn, Q_new - 1, M ); + +#endif + acelp_core_switch_enc_ivas_fx( st, old_inp_12k8_fx + L_INP_MEM - NS2SA( INT_FS_12k8, ACELP_LOOK_NS ), old_inp_16k_fx + L_INP_MEM - NS2SA( INT_FS_16k, ACELP_LOOK_NS ), A_fx, 0, Q_new ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + // conv params from fix to float + fixedToFloat_arr( st->old_input_signal_fx, st->old_input_signal, 0, L_FRAME48k + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ); + temp = (float) fix16_to_float( st->clip_var_fx[0], 0 ) / 2.56f; + st->clip_var[0] = temp; + st->clip_var[1] = fix16_to_float( st->clip_var_fx[1], 14 ); + st->clip_var[2] = fix16_to_float( st->clip_var_fx[2], 8 ); + st->clip_var[3] = fix16_to_float( st->clip_var_fx[3], 0 ); + st->clip_var[4] = fix16_to_float( st->clip_var_fx[4], 14 ); + st->clip_var[5] = fix16_to_float( st->clip_var_fx[5], 14 ); + st->hLPDmem->tilt_code = float_to_fix16( st->hLPDmem->tilt_code_flt, 15 ); + st->hLPDmem->mem_w0_flt = fix16_to_float( st->hLPDmem->mem_w0, Q_new - 1 ); +#endif +#endif } + st->hBWE_TD->bwe_non_lin_prev_scale = 0.0; st->hBWE_FD->mem_deemph_old_syn = 0.0f; } diff --git a/lib_enc/fd_cng_enc.c b/lib_enc/fd_cng_enc.c index 4dab9cca66c7517dc92d78ee6fe3436c91911792..325c0bf1c01437d03d9e0dfb3a41d88808561d22 100644 --- a/lib_enc/fd_cng_enc.c +++ b/lib_enc/fd_cng_enc.c @@ -274,152 +274,10 @@ void configureFdCngEnc( assert( !"Unsupported FFT length for FD-based CNG" ); break; } - hsCom->frameSize = hsCom->fftlen >> 1; - - return; -} - -#ifdef IVAS_FLOAT_FIXED -void configureFdCngEnc_ivas_fx( - HANDLE_FD_CNG_ENC hFdCngEnc, /* i/o: Contains the variables related to the FD-based CNG process */ - const Word16 bwidth, - const Word32 total_brate ) -{ - HANDLE_FD_CNG_COM hsCom = hFdCngEnc->hFdCngCom; - Word16 psizeDec[NPART]; - Word16 psizeDec_norm[NPART]; - Word16 psizeDec_norm_exp; - Word16 psize_invDec[NPART]; - - set16_fx( psizeDec, 0, NPART ); - - hsCom->CngBandwidth = bwidth; - move16(); - IF( EQ_16( hsCom->CngBandwidth, FB ) ) - { - hsCom->CngBandwidth = SWB; - move16(); - } - hsCom->CngBitrate = total_brate; - move32(); - - /* NB configuration */ - IF( EQ_16( bwidth, NB ) ) - { - hsCom->FdCngSetup = FdCngSetup_nb; /* PTR assignation -> no move needed*/ - move16(); - } - - /* WB configuration */ - ELSE IF( EQ_16( bwidth, WB ) ) - { - /* FFT 6.4kHz, no CLDFB */ - IF( LE_32( total_brate, ACELP_8k00 ) ) - { - hsCom->FdCngSetup = FdCngSetup_wb1; - move16(); - } - /* FFT 6.4kHz, CLDFB 8.0kHz */ - ELSE IF( LE_32( total_brate, ACELP_13k20 ) ) - { - hsCom->FdCngSetup = FdCngSetup_wb2; - move16(); - } - /* FFT 8.0kHz, no CLDFB */ - ELSE - { - hsCom->FdCngSetup = FdCngSetup_wb3; - move16(); - } - } - - /* SWB/FB configuration */ - ELSE - { - /* FFT 6.4kHz, CLDFB 14kHz */ - IF( LE_32( total_brate, ACELP_13k20 ) ) - { - hsCom->FdCngSetup = FdCngSetup_swb1; - move16(); - } - /* FFT 8.0kHz, CLDFB 16kHz */ - ELSE - { - hsCom->FdCngSetup = FdCngSetup_swb2; - move16(); - } - } - hsCom->fftlen = hsCom->FdCngSetup.fftlen; - move16(); - hFdCngEnc->stopFFTbinDec = hsCom->FdCngSetup.stopFFTbin; - move16(); - - /* Configure the SID quantizer and the Confort Noise Generator */ - - hFdCngEnc->startBandDec = hsCom->startBand; - move16(); - hFdCngEnc->stopBandDec = add( hsCom->FdCngSetup.sidPartitions[hsCom->FdCngSetup.numPartitions - 1], 1 ); - move16(); - initPartitions( hsCom->FdCngSetup.sidPartitions, - hsCom->FdCngSetup.numPartitions, - hFdCngEnc->startBandDec, - hFdCngEnc->stopBandDec, - hFdCngEnc->partDec, - &hFdCngEnc->npartDec, - hFdCngEnc->midbandDec, - psizeDec, - psizeDec_norm, - &psizeDec_norm_exp, - psize_invDec, - 0 ); - IF( EQ_16( hFdCngEnc->stopFFTbinDec, 160 ) ) - { - hFdCngEnc->nFFTpartDec = 17; - move16(); - } - ELSE IF( EQ_16( hFdCngEnc->stopFFTbinDec, 256 ) ) - { - hFdCngEnc->nFFTpartDec = 20; - move16(); - } - ELSE - { - hFdCngEnc->nFFTpartDec = 21; - move16(); - } - - switch ( hsCom->fftlen ) - { - case 512: -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - hsCom->fftSineTab_flt = NULL; - hsCom->olapWinAna_flt = olapWinAna512; - hsCom->olapWinSyn_flt = olapWinSyn256; -#endif - hsCom->fftSineTab_fx = NULL; - hsCom->olapWinAna_fx = olapWinAna512_fx; - hsCom->olapWinSyn_fx = olapWinSyn256_fx; - break; - case 640: -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - hsCom->fftSineTab_flt = fftSineTab640; - hsCom->olapWinAna_flt = olapWinAna640; - hsCom->olapWinSyn_flt = olapWinSyn320; -#endif - hsCom->fftSineTab_fx = fftSineTab640_fx; - hsCom->olapWinAna_fx = olapWinAna640_fx; - hsCom->olapWinSyn_fx = olapWinSyn320_fx; - break; - default: - assert( !"Unsupported FFT length for FD-based CNG" ); - break; - } hsCom->frameSize = shr( hsCom->fftlen, 1 ); - move16(); return; } -#endif /*-------------------------------------------------------------------* * deleteFdCngEnc() diff --git a/lib_enc/fd_cng_enc_fx.c b/lib_enc/fd_cng_enc_fx.c index 3d133928b847210c66aa9c29c7cf5e0d0282e6bd..a2644946b5c62e1e662841d5ba13095f9a6f42ce 100644 --- a/lib_enc/fd_cng_enc_fx.c +++ b/lib_enc/fd_cng_enc_fx.c @@ -30,6 +30,7 @@ y = u__p; \ } #endif +extern void BASOP_getTables( const PWord16 **ptwiddle, const PWord16 **sin_twiddle, Word16 *psin_step, Word16 length ); /************************************* * Create an instance of type FD_CNG * @@ -266,6 +267,150 @@ void configureFdCngEnc_fx( HANDLE_FD_CNG_ENC hsEnc, /* i/o: Contains the variabl hsCom->frameSize = shr( hsCom->fftlen, 1 ); } +void configureFdCngEnc_ivas_fx( + HANDLE_FD_CNG_ENC hFdCngEnc, /* i/o: Contains the variables related to the FD-based CNG process */ + const Word16 bwidth, + const Word32 total_brate ) +{ + HANDLE_FD_CNG_COM hsCom = hFdCngEnc->hFdCngCom; + Word16 psizeDec[NPART]; + Word16 psizeDec_norm[NPART]; + Word16 psizeDec_norm_exp; + Word16 psize_invDec[NPART]; + + set16_fx( psizeDec, 0, NPART ); + + hsCom->CngBandwidth = bwidth; + move16(); + IF( EQ_16( hsCom->CngBandwidth, FB ) ) + { + hsCom->CngBandwidth = SWB; + move16(); + } + hsCom->CngBitrate = total_brate; + move32(); + + /* NB configuration */ + IF( EQ_16( bwidth, NB ) ) + { + hsCom->FdCngSetup = FdCngSetup_nb; /* PTR assignation -> no move needed*/ + } + + /* WB configuration */ + ELSE IF( EQ_16( bwidth, WB ) ) + { + /* FFT 6.4kHz, no CLDFB */ + IF( LE_32( total_brate, ACELP_8k00 ) ) + { + hsCom->FdCngSetup = FdCngSetup_wb1; + } + /* FFT 6.4kHz, CLDFB 8.0kHz */ + ELSE IF( LE_32( total_brate, ACELP_13k20 ) ) + { + hsCom->FdCngSetup = FdCngSetup_wb2; + } + /* FFT 8.0kHz, no CLDFB */ + ELSE + { + hsCom->FdCngSetup = FdCngSetup_wb3; + } + } + + /* SWB/FB configuration */ + ELSE + { + /* FFT 6.4kHz, CLDFB 14kHz */ + IF( LE_32( total_brate, ACELP_13k20 ) ) + { + hsCom->FdCngSetup = FdCngSetup_swb1; + } + /* FFT 8.0kHz, CLDFB 16kHz */ + ELSE + { + hsCom->FdCngSetup = FdCngSetup_swb2; + } + } + hsCom->fftlen = hsCom->FdCngSetup.fftlen; + move16(); + hFdCngEnc->stopFFTbinDec = hsCom->FdCngSetup.stopFFTbin; + move16(); + + /* Configure the SID quantizer and the Confort Noise Generator */ + + hFdCngEnc->startBandDec = hsCom->startBand; + move16(); + hFdCngEnc->stopBandDec = add( hsCom->FdCngSetup.sidPartitions[hsCom->FdCngSetup.numPartitions - 1], 1 ); + move16(); + initPartitions( hsCom->FdCngSetup.sidPartitions, + hsCom->FdCngSetup.numPartitions, + hFdCngEnc->startBandDec, + hFdCngEnc->stopBandDec, + hFdCngEnc->partDec, + &hFdCngEnc->npartDec, + hFdCngEnc->midbandDec, + psizeDec, + psizeDec_norm, + &psizeDec_norm_exp, + psize_invDec, + 0 ); + IF( EQ_16( hFdCngEnc->stopFFTbinDec, 160 ) ) + { + hFdCngEnc->nFFTpartDec = 17; + move16(); + } + ELSE IF( EQ_16( hFdCngEnc->stopFFTbinDec, 256 ) ) + { + hFdCngEnc->nFFTpartDec = 20; + move16(); + } + ELSE + { + hFdCngEnc->nFFTpartDec = 21; + move16(); + } + + SWITCH( hsCom->fftlen ) + { + case 512: +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + hsCom->fftSineTab_flt = NULL; + hsCom->olapWinAna_flt = olapWinAna512; + hsCom->olapWinSyn_flt = olapWinSyn256; +#endif + hsCom->fftSineTab_fx = NULL; + hsCom->olapWinAna_fx = olapWinAna512_fx; + hsCom->olapWinSyn_fx = olapWinSyn256_fx; + hsCom->fftlenShift = 8; + move16(); + hsCom->fftlenFac = 32767 /*1.0 Q15*/; + move16(); + BREAK; + case 640: +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + hsCom->fftSineTab_flt = fftSineTab640; + hsCom->olapWinAna_flt = olapWinAna640; + hsCom->olapWinSyn_flt = olapWinSyn320; +#endif + hsCom->fftSineTab_fx = fftSineTab640_fx; + hsCom->olapWinAna_fx = olapWinAna640_fx; + hsCom->olapWinSyn_fx = olapWinSyn320_fx; + hsCom->fftlenShift = 9; + move16(); + hsCom->fftlenFac = 20480 /*0.625 Q15*/; + move16(); + BREAK; + default: + assert( !"Unsupported FFT length for FD-based CNG" ); + BREAK; + } + BASOP_getTables( &hsCom->olapWinAna, NULL, NULL, shr( hsCom->fftlen, 1 ) ); + BASOP_getTables( &hsCom->olapWinSyn, NULL, NULL, shr( hsCom->fftlen, 2 ) ); + hsCom->frameSize = shr( hsCom->fftlen, 1 ); + move16(); + + return; +} + /************************************** * Delete the instance of type FD_CNG * **************************************/ @@ -1547,6 +1692,403 @@ void generate_comfort_noise_enc_fx( Encoder_State *stcod, } } +#ifdef IVAS_FLOAT_FIXED +void generate_comfort_noise_enc_ivas_fx( Encoder_State *stcod, + Word16 Q_new, + Word16 gen_exc ) +{ + Word16 i, s, sn, cnt; + Word16 startBand2; + Word16 stopFFTbin2; + Word16 preemph_fac; + Word32 sqrtNoiseLevel; + Word16 randGaussExp; + Word16 fftBufferExp; + Word16 cngNoiseLevelExp; + Word16 *seed; + Word16 *timeDomainOutput; + Word32 *ptr_r, *ptr_i; + Word32 *cngNoiseLevel; + Word32 *ptr_level; + Word32 *fftBuffer; + Word16 old_syn_pe_tmp[16]; + Word16 tcx_transition = 0; + move16(); + HANDLE_FD_CNG_ENC stenc = stcod->hFdCngEnc; + HANDLE_FD_CNG_COM st = stenc->hFdCngCom; + DTX_ENC_HANDLE hDtxEnc = stcod->hDtxEnc; + TD_CNG_ENC_HANDLE hTdCngEnc = stcod->hTdCngEnc; + + LPD_state_HANDLE hLPDmem = stcod->hLPDmem; + TCX_ENC_HANDLE hTcxEnc = stcod->hTcxEnc; + + /* Warning fix */ + s = 0; + move16(); + + /* pointer initialization */ + + cngNoiseLevel = st->cngNoiseLevel; + cngNoiseLevelExp = st->cngNoiseLevelExp; + move16(); + ptr_level = cngNoiseLevel; + seed = &( st->seed ); + fftBuffer = st->fftBuffer; + timeDomainOutput = st->timeDomainBuffer; + + /* + Generate Gaussian random noise in real and imaginary parts of the FFT bins + Amplitudes are adjusted to the estimated noise level cngNoiseLevel in each bin + scaling Gaussian random noise: format Q3.29 + */ + sn = 0; + move16(); + IF( s_and( cngNoiseLevelExp, 1 ) != 0 ) + { + sn = add( sn, 1 ); + cngNoiseLevelExp = add( cngNoiseLevelExp, sn ); + } + + randGaussExp = CNG_RAND_GAUSS_SHIFT; + move16(); + cnt = sub( stenc->stopFFTbinDec, stenc->startBandDec ); + IF( stenc->startBandDec == 0 ) + { + /* DC component in FFT */ + s = 0; + move16(); + sqrtNoiseLevel = Sqrt32( L_shr( *ptr_level, sn ), &s ); + + fftBuffer[0] = L_shl( Mpy_32_32( rand_gauss( seed ), sqrtNoiseLevel ), s ); + move32(); + + /* Nyquist frequency is discarded */ + fftBuffer[1] = L_deposit_l( 0 ); + move32(); + + ptr_level = ptr_level + 1; + ptr_r = fftBuffer + 2; + cnt = sub( cnt, 1 ); + } + ELSE + { + startBand2 = shl( stenc->startBandDec, 1 ); + set32_fx( fftBuffer, 0, startBand2 ); + ptr_r = fftBuffer + startBand2; + } + + sn = add( sn, 1 ); + ptr_i = ptr_r + 1; + FOR( i = 0; i < cnt; i++ ) + { + s = 0; + move16(); + sqrtNoiseLevel = Sqrt32( L_shr( *ptr_level, sn ), &s ); + + /* Real part in FFT bins */ + *ptr_r = L_shl( Mpy_32_32( rand_gauss( seed ), sqrtNoiseLevel ), s ); + move32(); + + /* Imaginary part in FFT bins */ + *ptr_i = L_shl( Mpy_32_32( rand_gauss( seed ), sqrtNoiseLevel ), s ); + move32(); + + ptr_r = ptr_r + 2; + ptr_i = ptr_i + 2; + ptr_level = ptr_level + 1; + } + + /* Remaining FFT bins are set to zero */ + stopFFTbin2 = shl( stenc->stopFFTbinDec, 1 ); + set32_fx( fftBuffer + stopFFTbin2, 0, sub( st->fftlen, stopFFTbin2 ) ); + + fftBufferExp = add( shr( cngNoiseLevelExp, 1 ), randGaussExp ); + + /* If previous frame is active, reset the overlap-add buffer */ + IF( GT_32( stcod->last_core_brate, SID_2k40 ) ) + { + set16_fx( st->olapBufferSynth, 0, st->fftlen ); + test(); + test(); + IF( ( GT_32( stcod->last_core, ACELP_CORE ) && EQ_16( stcod->codec_mode, MODE2 ) ) || EQ_16( stcod->codec_mode, MODE1 ) ) + { + tcx_transition = 1; + move16(); + } + } + + /* Perform STFT synthesis */ + SynthesisSTFT_enc_ivas_fx( fftBuffer, fftBufferExp, timeDomainOutput, st->olapBufferSynth, st->olapWinSyn, + tcx_transition, st, gen_exc, &Q_new, -1, -1 ); + IF( hTdCngEnc != NULL ) + { + Word32 Lener, att; + Word16 exp; + /* update CNG excitation energy for LP_CNG */ + + /* calculate the residual signal energy */ + /*enr = dotp( st->exc_cng, st->exc_cng, st->frameSize ) / st->frameSize;*/ + Lener = Dot_productSq16HQ( 1, st->exc_cng, stcod->L_frame, &exp ); + exp = add( sub( shl( sub( 15, Q_new ), 1 ), 8 ), exp ); /*8 = log2(256)*/ + + /* convert log2 of residual signal energy */ + /*(float)log10( enr + 0.1f ) / (float)log10( 2.0f );*/ + Lener = BASOP_Util_Log2( Lener ); + Lener = L_add( Lener, L_shl( L_deposit_l( exp ), WORD32_BITS - 1 - LD_DATA_SCALE ) ); /*Q25*/ + if ( EQ_16( stcod->L_frame, L_FRAME16k ) ) + { + Lener = L_sub( Lener, 10802114l /*0.3219280949f Q25*/ ); /*log2(320) = 8.3219280949f*/ + } + /* decrease the energy in case of WB input */ + IF( NE_16( stcod->bwidth, NB ) ) + { + IF( EQ_16( stcod->bwidth, WB ) ) + { + IF( hDtxEnc->CNG_mode >= 0 ) + { + /* Bitrate adapted attenuation */ + att = L_shl( L_deposit_l( ENR_ATT_fx[hDtxEnc->CNG_mode] ), 17 ); + } + ELSE + { + /* Use least attenuation for higher bitrates */ + att = L_shl( L_deposit_l( ENR_ATT_fx[4] ), 17 ); + } + } + ELSE + { + att = 384 << 17; + move32(); /*1.5 Q8<<17=Q25*/ + } + Lener = L_sub( Lener, att ); + } + /*stdec->lp_ener = 0.8f * stcod->lp_ener + 0.2f * pow( 2.0f, enr );*/ + Lener = BASOP_util_Pow2( Lener, 6, &exp ); + Lener = Mult_32_16( Lener, 6554 /*0.2f Q15*/ ); + exp = sub( 25, exp ); + Lener = L_shr( Lener, exp ); /*Q6*/ + hTdCngEnc->lp_ener_fx = L_add( Mult_32_16( hTdCngEnc->lp_ener_fx, 26214 /*0.8f Q15*/ ), Lener ); /*Q6*/ + move32(); + } + + /* Overlap-add when previous frame is active */ + test(); + IF( ( GT_32( stcod->last_core_brate, SID_2k40 ) ) && ( EQ_16( stcod->codec_mode, MODE2 ) ) ) + { + Word32 old_exc_ener, gain, noise32; + Word16 seed_loc, lpcorder, old_syn, tmp, gain16, N, N2, N4, N8; + Word16 old_exc_ener_exp, gain_exp; + Word16 normFacE, normShiftE, normShiftEM1; + Word16 normFacG, normShiftG, normShiftGM1; + Word16 noiseExp, *old_exc, old_Aq[M + 1], *old_syn_pe; + Word16 noise[640], normShiftP2; + Word16 Q_exc, Q_syn; + + + assert( st->frameSize <= 640 ); + + seed_loc = st->seed; + move16(); + N = st->frameSize; + move16(); + N2 = shr( st->frameSize, 1 ); + + IF( GT_16( stcod->last_core, ACELP_CORE ) ) + { + Word16 left_overlap_mode; + left_overlap_mode = stcod->hTcxCfg->tcx_last_overlap_mode; + move16(); + if ( EQ_16( left_overlap_mode, ALDO_WINDOW ) ) + { + left_overlap_mode = FULL_OVERLAP; + move16(); + } + + tcx_windowing_synthesis_current_frame( timeDomainOutput, + stcod->hTcxCfg->tcx_mdct_window, /*Keep sine windows for limiting Time modulation*/ + stcod->hTcxCfg->tcx_mdct_window_half, + stcod->hTcxCfg->tcx_mdct_window_minimum, + stcod->hTcxCfg->tcx_mdct_window_length, + stcod->hTcxCfg->tcx_mdct_window_half_length, + stcod->hTcxCfg->tcx_mdct_window_min_length, + 0, + left_overlap_mode, + NULL, + NULL, + NULL, + NULL, + NULL, + N / 2, + shr( sub( abs_s( stcod->hTcxCfg->tcx_offset ), stcod->hTcxCfg->tcx_offset ), 1 ), /* equivalent to: stdec->hTcxCfg->tcx_offset<0?-stdec->hTcxCfg->tcx_offset:0 */ + 1, + 0, + 0 ); + + IF( stcod->hTcxCfg->last_aldo != 0 ) + { + FOR( i = 0; i < st->frameSize; i++ ) + { + timeDomainOutput[i] = add( timeDomainOutput[i], shr_r( hTcxEnc->old_out_fx[i + NS2SA( stcod->sr_core, N_ZERO_MDCT_NS )], hTcxEnc->Q_old_out ) ); + move16(); + } + } + ELSE + { + tcx_windowing_synthesis_past_frame( hTcxEnc->Txnq, + stcod->hTcxCfg->tcx_aldo_window_1_trunc, + stcod->hTcxCfg->tcx_mdct_window_half, + stcod->hTcxCfg->tcx_mdct_window_minimum, + stcod->hTcxCfg->tcx_mdct_window_length, + stcod->hTcxCfg->tcx_mdct_window_half_length, + stcod->hTcxCfg->tcx_mdct_window_min_length, + stcod->hTcxCfg->tcx_last_overlap_mode ); + + FOR( i = 0; i < N2; i++ ) + { + timeDomainOutput[i] = add( timeDomainOutput[i], shl( hTcxEnc->Txnq[i], TCX_IMDCT_HEADROOM ) ); + move16(); + } + } + } + ELSE + { + + /* + - the scaling of the LPCs (e.g. old_Aq) is always Q12 (encoder or decoder) + + - the scaling of the deemphasized signals (e.g. old_syn) is always Q0 (encoder or decoder) + + - the scaling of the excitation signals in the encoder (e.g. old_exc) is Q_new + - the scaling of the preemphasized signals in the encoder (e.g. old_syn_pe) is Q_new-1 + + - the scaling of the excitation signals in the decoder (e.g. old_exc) is Q_exc (or stdec->Q_exc) + - the scaling of the preemphasized signals in the decoder (e.g. old_syn_pe) is Q_syn (or stdec->Q_syn) + */ + + lpcorder = M; + move16(); + E_LPC_f_lsp_a_conversion( stcod->lsp_old_fx, old_Aq, M ); + old_exc = hLPDmem->old_exc + sub( L_EXC_MEM, N2 ); + old_syn_pe = hLPDmem->mem_syn2; + old_syn = hLPDmem->syn[lpcorder]; + move16(); + preemph_fac = stcod->preemph_fac; + move16(); + Q_exc = Q_new; + Q_syn = sub( Q_new, 1 ); + + /* shift to be in the range of values supported by getNormReciprocalWord16() */ + N8 = shr( N2, CNG_NORM_RECIPROCAL_RANGE_SHIFT ); + + assert( N2 == ( N8 << CNG_NORM_RECIPROCAL_RANGE_SHIFT ) ); + + normFacE = getNormReciprocalWord16( N8 ); + normShiftE = BASOP_util_norm_s_bands2shift( N8 ); + normShiftEM1 = sub( normShiftE, 1 ); + normShiftP2 = add( normShiftE, CNG_NORM_RECIPROCAL_RANGE_SHIFT ); + + old_exc_ener = L_shr( L_mult( old_exc[0], old_exc[0] ), normShiftP2 ); + FOR( i = 1; i < N2; i++ ) + { + old_exc_ener = L_add( old_exc_ener, L_shr( L_mult( old_exc[i], old_exc[i] ), normShiftP2 ) ); + } + old_exc_ener = L_shl( Mpy_32_16_1( old_exc_ener, shl( normFacE, normShiftEM1 ) ), 1 ); + + old_exc_ener_exp = 0; + move16(); + old_exc_ener = Sqrt32( old_exc_ener, &old_exc_ener_exp ); + old_exc_ener_exp = add( old_exc_ener_exp, ( sub( 15, Q_exc ) ) ); + + /* shift to be in the range of values supported by getNormReciprocalWord16() */ + N4 = shr( N, CNG_NORM_RECIPROCAL_RANGE_SHIFT ); + + assert( N == ( N4 << CNG_NORM_RECIPROCAL_RANGE_SHIFT ) ); + + normFacG = getNormReciprocalWord16( N4 ); + normShiftG = BASOP_util_norm_s_bands2shift( N4 ); + normShiftGM1 = sub( normShiftG, 1 ); + normShiftP2 = add( normShiftG, CNG_NORM_RECIPROCAL_RANGE_SHIFT ); + + gain = L_deposit_l( 0 ); + FOR( i = 0; i < N; i++ ) + { + noise32 = rand_gauss( &seed_loc ); + noise[i] = extract_h( noise32 ); + move16(); + gain = L_add( gain, L_shr( L_mult( noise[i], noise[i] ), normShiftP2 ) ); + } + gain = L_shl( Mpy_32_16_1( gain, shl( normFacG, normShiftGM1 ) ), 1 ); + + gain_exp = 2 * CNG_RAND_GAUSS_SHIFT; + move16(); + gain = ISqrt32( gain, &gain_exp ); + + gain = Mpy_32_32( old_exc_ener, gain ); + gain16 = extract_h( gain ); + + gain_exp = add( old_exc_ener_exp, gain_exp ); + noiseExp = add( CNG_RAND_GAUSS_SHIFT, gain_exp ); + + s = sub( 15 - NOISE_HEADROOM, noiseExp ); + FOR( i = 0; i < N; i++ ) + { +#ifdef BASOP_NOGLOB + noise[i] = shr_sat( mult( noise[i], gain16 ), s ); +#else + noise[i] = shr( mult( noise[i], gain16 ), s ); +#endif + move16(); + } + + assert( lpcorder <= 16 ); + + s = sub( 15 - NOISE_HEADROOM, ( sub( 15, Q_syn ) ) ); + FOR( i = 0; i < lpcorder; i++ ) + { +#ifdef BASOP_NOGLOB + old_syn_pe_tmp[i] = shr_sat( old_syn_pe[i], s ); +#else + old_syn_pe_tmp[i] = shr( old_syn_pe[i], s ); +#endif + move16(); + } + + E_UTIL_synthesis( + 0, /* i : scaling to apply for a[0] Q0 */ + old_Aq, /* i : LP filter coefficients Q12 */ + noise, /* i : input signal Qx */ + noise, /* o : output signal Qx-s */ + N, /* i : size of filtering Q0 */ + old_syn_pe_tmp, /* i/o: memory associated with this filtering. Q0 */ + 0, /* i : 0=no update, 1=update of memory. Q0 */ + lpcorder /* i : order of LP filter Q0 */ + ); + + tmp = old_syn; + move16(); + + E_UTIL_deemph2( + NOISE_HEADROOM, + noise, /* I/O: signal Qx */ + preemph_fac, /* I: deemphasis factor Qx */ + N, /* I: vector size */ + &tmp /* I/O: memory (signal[-1]) Qx */ + ); + + FOR( i = 0; i < N4; i++ ) + { + tmp = mult( noise[i], st->olapWinSyn[i].v.re ); + timeDomainOutput[i] = add( timeDomainOutput[i], tmp ); + move16(); + tmp = mult( noise[i + N4], st->olapWinSyn[N4 - 1 - i].v.im ); + timeDomainOutput[i + N4] = add( timeDomainOutput[i + N4], tmp ); + move16(); + } + } + } +} +#endif + /*-------------------------------------------------------------------* * cng_energy_fx() * diff --git a/lib_enc/find_tilt.c b/lib_enc/find_tilt.c index 0f64f8888a6b455ab96220f966b08974fba764fe..36a97e346a494181ce850a2bcebc701343176ab2 100644 --- a/lib_enc/find_tilt.c +++ b/lib_enc/find_tilt.c @@ -41,6 +41,12 @@ #include "prot.h" #include "wmc_auto.h" +#ifdef IVAS_FLOAT_FIXED +#include "prot_fx.h" /* Function prototypes */ +#include "prot_fx_enc.h" /* Function prototypes */ +#endif // IVAS_FLOAT_FIXED + + /*---------------------------------------------------------------------* * Local constants *---------------------------------------------------------------------*/ @@ -55,6 +61,275 @@ * * Find LF/HF energy ratio *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void find_tilt_ivas_fx( + const Word32 fr_bands[], /* i : energy in frequency bands Q_new*/ + const Word32 bckr[], /* i : per band background noise energy estimate Q_new*/ + Word32 ee[2], /* o : lf/hf E ration for present frame Q6*/ + const Word16 pitch[3], /* i : open loop pitch values for 3 half-frames Q0*/ + const Word16 voicing[3], /* i : normalized correlation for 3 half-frames Q15*/ + const Word32 *lf_E, /* i : per bin energy for low frequencies Q_new - 2*/ + const Word16 corr_shift, /* i : normalized correlation correction Q15*/ + const Word16 bwidth, /* i : input signal bandwidth */ + const Word16 max_band, /* i : maximum critical band */ + Word32 hp_E[], /* o : energy in HF Q_new*/ + const Word16 codec_mode, /* i : MODE1 or MODE2 */ + const Word16 Q_new, /* i : scaling factor */ + Word32 *bckr_tilt_lt /* i/o: lf/hf E ratio of background noise Q16 */ + , + Word16 Opt_vbr_mode ) +{ + Word32 lp_bckr = 0, hp_bckr = 0, lp_E, Ltmp; + const Word32 *pt_E, *pt_bands, *pt_bckr, *hf_bands, *tmp_E; + Word16 tmp, freq, f0, f1, f2, mean_voi, bin; + Word16 i, nb_bands; + Word16 e_tmp, m_tmp; + Word16 m_Fs, e_Fs; + Word16 m_cnt, e_cnt; + Word16 m_hpE, e_hpE; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + move32(); + move32(); + /*-----------------------------------------------------------------* + * Initializations + *-----------------------------------------------------------------*/ + + IF( NE_16( bwidth, NB ) ) + { + /* WB processing */ + bin = BIN4_FX; + move16(); /* First useful frequency bin ~ 50 Hz */ + pt_bands = fr_bands; + tmp_E = lf_E; + pt_bckr = bckr; + nb_bands = 10; + move16(); + } + ELSE + { + /* NB processing */ + bin = add( shl( BIN4_FX, 1 ), BIN4_FX ); /* First useful frequency bin ~ 150 Hz */ + pt_bands = fr_bands + 1; /* Exlcude 1st critical band */ + tmp_E = lf_E + 2; /* Start at the 3rd bin (150 Hz) */ + pt_bckr = bckr + 1; /* Exlcude 1st critical band */ + nb_bands = 9; + move16(); /* Nb. of "low" frequency bands taken into account in NB processing */ + } + + /*-----------------------------------------------------------------* + * Find spectrum tilt + *-----------------------------------------------------------------*/ + + pt_E = tmp_E; /* Point at the 1st useful element of the per-bin energy vector */ + hf_bands = fr_bands; + + /* bckr + voicing */ + /*lp_bckr = mean( pt_bckr, nb_bands );*/ /* estimated noise E in first critical bands, up to 1270 Hz */ + lp_bckr = Mean32( pt_bckr, nb_bands ); + /*hp_bckr = 0.5f * (bckr[max_band-1] + bckr[max_band]);*/ /* estimated noise E in last 2 critical bands */ +#ifdef BASOP_NOGLOB + hp_bckr = L_shr( L_add_sat( bckr[max_band - 1], bckr[max_band] ), 1 ); +#else + hp_bckr = L_shr( L_add( bckr[max_band - 1], bckr[max_band] ), 1 ); +#endif + if ( hp_bckr == 0 ) /* Avoid division by zero. */ + { + hp_bckr = L_deposit_l( 1 ); + } + Ltmp = BASOP_Util_Divide3232_Scale_cadence( lp_bckr, hp_bckr, &e_tmp ); + Ltmp = Mpy_32_16_r( Ltmp, 3277 ); + Ltmp = L_shr_sat( Ltmp, sub( 15, e_tmp ) ); + *bckr_tilt_lt = L_add( Mpy_32_16_r( *bckr_tilt_lt, 29491 ), Ltmp ); + move32(); + + test(); + IF( EQ_16( codec_mode, MODE2 ) || Opt_vbr_mode == 1 ) + { + /*lp_bckr *= FACT;*/ + /*hp_bckr *= FACT;*/ +#ifdef BASOP_NOGLOB + lp_bckr = L_add_sat( L_shl_sat( lp_bckr, 1 ), lp_bckr ); + hp_bckr = L_add_sat( L_shl_sat( hp_bckr, 1 ), hp_bckr ); +#else + lp_bckr = L_add( L_shl( lp_bckr, 1 ), lp_bckr ); + hp_bckr = L_add( L_shl( hp_bckr, 1 ), hp_bckr ); +#endif + } + /*mean_voi = 0.5f * (voicing[1] + voicing[2]) + corr_shift;*/ + Ltmp = L_mult( voicing[1], 16384 ); + Ltmp = L_mac( Ltmp, voicing[2], 16384 ); +#ifdef BASOP_NOGLOB + Ltmp = L_mac_o( Ltmp, corr_shift, 32767, &Overflow ); + mean_voi = round_fx_o( Ltmp, &Overflow ); +#else + Ltmp = L_mac( Ltmp, corr_shift, 32767 ); + mean_voi = round_fx( Ltmp ); +#endif + + /*f0 = INT_FS_FX / pitch[2];*/ + e_tmp = norm_s( pitch[2] ); + m_tmp = shl( pitch[2], e_tmp ); + + m_Fs = div_s( INT_FS_FX, m_tmp ); + e_Fs = sub( 15, e_tmp ); + f0 = shr( m_Fs, sub( e_Fs, 4 ) ); /* Q4 */ + + FOR( i = 0; i < 2; i++ ) + { + /*hp_E[i] = 0.5f * (hf_bands[max_band-1] + hf_bands[max_band]) - hp_bckr; */ /* averaged E in last 2 critical bands */ + Ltmp = L_add( L_shr( hf_bands[max_band - 1], 1 ), L_shr( hf_bands[max_band], 1 ) ); + hp_E[i] = L_sub( Ltmp, hp_bckr ); + move32(); + IF( Opt_vbr_mode == 0 ) + { + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( hp_E[i], sub( 31, Q_new ), E_MIN_IVAS_FX, 31 - Q19 ), -1 ) ) + { + hp_E[i] = L_shl( E_MIN_IVAS_FX, sub( Q_new, Q19 ) ); + move32(); + } + } + ELSE + { + hp_E[i] = L_max( hp_E[i], L_shl( 1, Q_new ) ); + move32(); + } + + test(); + IF( GT_16( mean_voi, TH_COR_FX ) && LT_16( pitch[2], TH_PIT_FX ) ) /* High-pitched voiced frames */ + { + freq = bin; + move16(); /* 1st useful frequency bin */ + m_cnt = 0; + move16(); + lp_E = L_deposit_l( 0 ); + + f1 = add( shr( f0, 1 ), f0 ); /* Middle between 2 harmonics */ + f2 = f0; + move16(); + WHILE( LE_16( freq, 20320 ) ) /* End frequency of 10th critical band */ + { + FOR( ; freq <= f1; freq += BIN4_FX ) + { + /* include only bins sufficiently close to harmonics */ + tmp = sub( freq, f2 ); + IF( L_mac0( -(Word32) TH_D_FX * TH_D_FX, tmp, tmp ) < 0 ) + { +#ifdef BASOP_NOGLOB + lp_E = L_add_o( *pt_E, lp_E, &Overflow ); +#else + lp_E = L_add( *pt_E, lp_E ); +#endif + m_cnt = add( m_cnt, 1 ); + } + pt_E++; + } +#ifdef BASOP_NOGLOB + f1 = add_o( f1, f0, &Overflow ); + f2 = add_o( f2, f0, &Overflow ); +#else + f1 = add( f1, f0 ); + f2 = add( f2, f0 ); +#endif + } + /*lp_E = lp_E / (float)cnt - lp_bckr;*/ + e_tmp = sub( norm_l( lp_E ), 1 ); + m_tmp = extract_h( L_shl( lp_E, e_tmp ) ); + + e_tmp = sub( e_tmp, 2 ); /* lf_e divided by 4 in anal_sp */ + + e_cnt = norm_s( m_cnt ); + m_cnt = shl( m_cnt, e_cnt ); + + m_tmp = div_s( m_tmp, m_cnt ); + e_tmp = sub( e_tmp, e_cnt ); + +#ifdef BASOP_NOGLOB + lp_E = L_sub_o( L_shr_o( m_tmp, sub( e_tmp, 1 ), &Overflow ), lp_bckr, &Overflow ); +#else + lp_E = L_sub( L_shr( m_tmp, sub( e_tmp, 1 ) ), lp_bckr ); +#endif + + pt_E = tmp_E + VOIC_BINS; /* Update for next half-frame */ + } + ELSE /* Other than high-pitched voiced frames */ + { + /*lp_E = mean( pt_bands, nb_bands ) - lp_bckr;*/ /* averaged E in first critical bands, up to 1270 Hz */ + lp_E = L_sub( Mean32( pt_bands, nb_bands ), lp_bckr ); + } + IF( Opt_vbr_mode == 0 ) + { + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( lp_E, sub( 31, Q_new ), E_MIN_IVAS_FX, 31 - Q19 ), -1 ) ) + { + lp_E = L_shl( E_MIN_IVAS_FX, sub( Q_new, Q19 ) ); + } + } + ELSE + { + lp_E = L_max( lp_E, 0 ); + } + /*ee[i] = lp_E / hp_E[i];*/ /* LF/HF ratio */ + test(); + IF( lp_E != 0 && hp_E[i] != 0 ) + { + e_tmp = sub( norm_l( lp_E ), 1 ); + m_tmp = extract_h( L_shl( lp_E, e_tmp ) ); + e_hpE = norm_l( hp_E[i] ); + m_hpE = extract_h( L_shl( hp_E[i], e_hpE ) ); + m_tmp = div_s( m_tmp, m_hpE ); + e_tmp = sub( e_tmp, e_hpE ); + +#ifdef BASOP_NOGLOB + ee[i] = L_shr_o( m_tmp, add( e_tmp, 15 - 6 ), &Overflow ); /* ee in Q6 */ +#else /* BASOP_NOGLOB */ + ee[i] = L_shr( m_tmp, add( e_tmp, 15 - 6 ) ); /* ee in Q6 */ +#endif + move32(); + } + ELSE IF( lp_E == 0 ) + { + ee[i] = L_deposit_l( 0 ); + move32(); + } + ELSE + { + ee[i] = MAX_32; + move32(); + } + + IF( EQ_16( bwidth, NB ) ) /* For NB input, compensate for the missing bands */ + { +#ifdef BASOP_NOGLOB + Ltmp = L_shl_o( ee[i], 3, &Overflow ); +#else + Ltmp = L_shl( ee[i], 3 ); +#endif + IF( EQ_32( Ltmp, MAX_32 ) ) /* if Overflow: Compute with less precision */ + { + Ltmp = Mult_32_16( ee[i], 24576 ); /* 6/8 */ +#ifdef BASOP_NOGLOB + ee[i] = L_shl_sat( Ltmp, 3 ); +#else + ee[i] = L_shl( Ltmp, 3 ); +#endif + move32(); /* x8 */ + } + ELSE + { + ee[i] = Mult_32_16( Ltmp, 24576 ); + move32(); /* 6/8 */ + } + } + + pt_bands += NB_BANDS; /* Update for next half-frame */ + hf_bands += NB_BANDS; + } + + return; +} +#endif // IVAS_FLOAT_FIXED void find_tilt( const float fr_bands[], /* i : energy in frequency bands */ diff --git a/lib_enc/find_uv.c b/lib_enc/find_uv.c index f48d1fe70157db039e9c0d18afe7895a18906516..7011112b4c16a6c5d56f306f5150264eff6a04ab 100644 --- a/lib_enc/find_uv.c +++ b/lib_enc/find_uv.c @@ -41,6 +41,11 @@ #include "prot.h" #include "wmc_auto.h" +#ifdef IVAS_FLOAT_FIXED +#include "prot_fx.h" /* Function prototypes */ +#include "prot_fx_enc.h" /* Function prototypes */ +#endif // IVAS_FLOAT_FIXED + /*-------------------------------------------------------------------* * Local constants *-------------------------------------------------------------------*/ @@ -97,8 +102,667 @@ static float find_ener_decrease( return dE2; } +#ifdef IVAS_FLOAT_FIXED +static Word16 find_ener_decrease_fx( /* o : maximum energy ratio Q10 */ + const Word16 ind_deltaMax, /* i : index of the beginning of maximum energy search */ + const Word32 *pt_enr_ssf /* i : Pointer to the energy buffer */ +) +{ + Word16 i, j, end, flag; + Word16 wtmp0, wtmp1; + Word32 maxEnr, minEnr; + Word16 dE2, exp0, exp1; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + dE2 = 0; + move16(); + + j = ind_deltaMax + 2; + move16(); + end = j + L_ENR; + move16(); + maxEnr = L_add( pt_enr_ssf[j], 0 ); + j = add( j, 1 ); + flag = 0; + move16(); + FOR( i = j; i < end; i++ ) + { + test(); + IF( ( GT_32( pt_enr_ssf[i], maxEnr ) ) && ( flag == 0 ) ) + { + maxEnr = L_add( pt_enr_ssf[i], 0 ); /*Q0*/ + j = add( j, 1 ); + } + ELSE + { + flag = 1; + move16(); + } + } + + minEnr = L_add( maxEnr, 0 ); + FOR( i = j; i < end; i++ ) + { + minEnr = L_min( minEnr, pt_enr_ssf[i] ); + } + + +#ifdef BASOP_NOGLOB + minEnr = L_add_sat( minEnr, 100000 ); +#else + minEnr = L_add( minEnr, 100000 ); +#endif + exp0 = norm_l( minEnr ); + wtmp0 = extract_h( L_shl( minEnr, exp0 ) ); + exp1 = sub( norm_l( maxEnr ), 1 ); + wtmp1 = extract_h( L_shl( maxEnr, exp1 ) ); + wtmp1 = div_s( wtmp1, wtmp0 ); +#ifdef BASOP_NOGLOB + dE2 = shr_ro( wtmp1, add( sub( exp1, exp0 ), 15 - 10 ), &Overflow ); /*Q10*/ +#else + dE2 = shr_r( wtmp1, add( sub( exp1, exp0 ), 15 - 10 ) ); /*Q10*/ +#endif + + return dE2; +} +/*-------------------------------------------------------------------* + * find_uv() + * + * Decision about coder type + *-------------------------------------------------------------------*/ +Word16 find_uv_ivas_fx( /* o : coding type */ + Encoder_State *st_fx, /* i/o: encoder state structure */ + const Word16 *T_op_fr, /* i : pointer to adjusted fractional pitch (4 val.) Q6 */ + const Word16 *voicing_fr, /* i : refined correlation for each subframes Q15 */ + const Word16 *speech, /* i : pointer to speech signal for E computation Q_new */ + const Word32 *ee, /* i : lf/hf Energy ratio for present frame Q6 */ + Word32 *dE1X, /* o : sudden energy increase for S/M classifier */ + const Word16 corr_shift, /* i : normalized correlation correction in noise Q15 */ + const Word16 relE, /* i : relative frame energy Q8 */ + const Word16 Etot, /* i : total energy Q8 */ + const Word32 hp_E[], /* i : energy in HF q_hp_E */ + Word16 *flag_spitch, /* i/o: flag to indicate very short stable pitch and high correlation */ + const Word16 last_core_orig, /* i : original last core */ + STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */ + const Word16 Q_new, + const Word16 q_hp_E ) +{ + Word16 coder_type, i; + Word32 mean_ee, dE1, fac_32; + const Word16 *pt_speech; + Word32 L_tmp, enr_ssf[2 * NB_SSF + 2 * NB_SSF + 2], E_min_th; + Word16 dE2; + Word16 ind_deltaMax, tmp_offset_flag; + Word32 Ltmp0, *pt_enr_ssf, *pt_enr_ssf1, dE2_th; + Word16 exp0, exp1; + Word16 wtmp0, wtmp1; + Word16 fac, mean_voi3, dE3; + Word16 relE_thres; + Word16 mean_voi3_offset; + Word16 voicing_m, dpit1, dpit2, dpit3; + Word16 ee0_th, ee1_th, voi_th, nb_cond, flag_low_relE; + NOISE_EST_HANDLE hNoiseEst = st_fx->hNoiseEst; + SC_VBR_ENC_HANDLE hSC_VBR = st_fx->hSC_VBR; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + Word16 Last_Resort; + Word16 vadnoise; + + IF( hSC_VBR != NULL ) + { + Last_Resort = hSC_VBR->Last_Resort; + move16(); + vadnoise = hSC_VBR->vadnoise_fx; + move16(); + } + ELSE + { + Last_Resort = 0; + move16(); + vadnoise = 0; + move16(); + } + + /*-----------------------------------------------------------------* + * Detect sudden energy increases to catch voice and music + * temporal events (dE1) + * + * - Find maximum energy per short subblocks. + * Two subblock sets are used shifted by half the subblock length + * - Find maximum energy ratio between adjacent subblocks + *-----------------------------------------------------------------*/ + + /* Find maximum energy per short subblocks */ + pt_speech = speech - SSF; + pt_enr_ssf = enr_ssf + 2 * NB_SSF; + FOR( i = 0; i < 2 * ( NB_SSF + 1 ); i++ ) + { + emaximum_fx( Q_new, pt_speech, SSF, pt_enr_ssf ); + pt_speech += ( SSF / 2 ); + pt_enr_ssf++; + } + + dE1 = 0; + move16(); + ind_deltaMax = 0; + move16(); + pt_enr_ssf = enr_ssf + 2 * NB_SSF; + pt_enr_ssf1 = pt_enr_ssf + 2; + + /* Test on energy increase between adjacent sub-subframes */ + exp1 = 0; + move16(); + FOR( i = 0; i < 2 * NB_SSF; i++ ) + { + /*fac = *pt_enr_ssf1 / (*pt_enr_ssf + 1);*/ + Ltmp0 = L_max( *pt_enr_ssf, 1 ); + exp0 = norm_l( Ltmp0 ); + wtmp0 = extract_h( L_shl( Ltmp0, exp0 ) ); + exp1 = sub( norm_l( *pt_enr_ssf1 ), 1 ); + wtmp1 = extract_h( L_shl( *pt_enr_ssf1, exp1 ) ); + fac = div_s( wtmp1, wtmp0 ); +#ifdef BASOP_NOGLOB + fac_32 = L_shr_o( L_deposit_l( fac ), add( sub( exp1, exp0 ), 15 - 13 ), &Overflow ); /* fac32 in Q13*/ +#else /* BASOP_NOGLOB */ + fac_32 = L_shr( L_deposit_l( fac ), add( sub( exp1, exp0 ), 15 - 13 ) ); /* fac32 in Q13*/ +#endif /* BASOP_NOGLOB */ + + if ( GT_32( fac_32, dE1 ) ) + { + ind_deltaMax = i; + move16(); + } + + dE1 = L_max( dE1, fac_32 ); + + pt_enr_ssf++; + pt_enr_ssf1++; + } + IF( hStereoClassif != NULL ) + { + IF( st_fx->idchan == 0 ) + { + hStereoClassif->dE1_ch1_fx = dE1; + move32(); + hStereoClassif->dE1_ch1_e = 31 - Q13; + move16(); + } + ELSE + { + hStereoClassif->dE1_ch2_fx = dE1; + move32(); + hStereoClassif->dE1_ch2_e = 31 - Q13; + move16(); + } + } + + if ( dE1X != NULL ) + { + *dE1X = dE1; + move32(); + } + + /*-----------------------------------------------------------------* + * Average spectral tilt + * Average voicing (normalized correlation) + *-----------------------------------------------------------------*/ + + /*mean_ee = 1.0f/3.0f * (st->ee_old + ee[0] + ee[1]); */ /* coefficients take into account the position of the window */ +#ifdef BASOP_NOGLOB + mean_ee = L_add_o( L_add_o( st_fx->ee_old_fx, ee[0], &Overflow ), ee[1], &Overflow ); +#else /* BASOP_NOGLOB */ + mean_ee = L_add( L_add( st_fx->ee_old_fx, ee[0] ), ee[1] ); +#endif /* BASOP_NOGLOB */ + mean_ee = Mult_32_16( mean_ee, 10923 ); /*Q6*/ + + /* mean_voi3 = 1.0f/3.0f * (voicing[0] + voicing[1] + voicing[2]);*/ + Ltmp0 = L_mult( st_fx->voicing_fx[0], 10923 ); + Ltmp0 = L_mac( Ltmp0, st_fx->voicing_fx[1], 10923 ); +#ifdef BASOP_NOGLOB // -dtx 12650 amrwb\Dtx3.INP + mean_voi3 = mac_r_sat( Ltmp0, st_fx->voicing_fx[2], 10923 ); /*Q15*/ +#else + mean_voi3 = mac_r( Ltmp0, st_fx->voicing_fx[2], 10923 ); /*Q15*/ +#endif + /*-----------------------------------------------------------------* + * Total frame energy difference (dE3) + *-----------------------------------------------------------------*/ + + dE3 = sub( Etot, hNoiseEst->Etot_last_fx ); /*Q8*/ + + /*-----------------------------------------------------------------* + * Energy decrease after spike (dE2) + *-----------------------------------------------------------------*/ + + /* set different thresholds and conditions for NB and WB input */ + dE2_th = 30 << 10; + move32(); + nb_cond = 1; + move16(); /* no additional condition for WB input */ + IF( EQ_16( st_fx->input_bwidth, NB ) ) + { + dE2_th = 21 << 10; + move32(); +#ifdef BASOP_NOGLOB + if ( GE_16( add_o( mean_voi3, corr_shift, &Overflow ), 22282 ) ) /*( mean_voi3 + corr_shift ) >= 0.68f*/ +#else + if ( GE_16( add( mean_voi3, corr_shift ), 22282 ) ) /*( mean_voi3 + corr_shift ) >= 0.68f*/ +#endif + { + nb_cond = 0; + move16(); + } + } + + /* calcualte maximum energy decrease */ + dE2 = 0; + move16(); /* Test on energy decrease after an energy spike */ + pt_enr_ssf = enr_ssf + 2 * NB_SSF; + + test(); + IF( GT_32( dE1, 30 << 13 ) && nb_cond ) /*>30 Q13*/ + { + IF( LT_16( sub( shl( NB_SSF, 1 ), ind_deltaMax ), L_ENR ) ) + { + st_fx->old_ind_deltaMax = ind_deltaMax; + move16(); + Copy32( pt_enr_ssf, st_fx->old_enr_ssf_fx, 2 * NB_SSF ); + } + ELSE + { + st_fx->old_ind_deltaMax = -1; + move16(); + dE2 = find_ener_decrease_fx( ind_deltaMax, pt_enr_ssf ); /*Q10*/ + + if ( GT_32( dE2, dE2_th ) ) + { + st_fx->spike_hyst = 0; + move16(); + } + } + } + ELSE + { + IF( st_fx->old_ind_deltaMax >= 0 ) + { + Copy32( st_fx->old_enr_ssf_fx, enr_ssf, 2 * NB_SSF ); + dE2 = find_ener_decrease_fx( st_fx->old_ind_deltaMax, enr_ssf ); + + if ( GT_32( dE2, dE2_th ) ) + { + st_fx->spike_hyst = 1; + move16(); + } + } + + st_fx->old_ind_deltaMax = -1; + move16(); + } + + /*-----------------------------------------------------------------* + * Detection of voiced offsets (tmp_offset_flag) + *-----------------------------------------------------------------*/ + + tmp_offset_flag = 1; + move16(); + + IF( NE_16( st_fx->input_bwidth, NB ) ) + { + ee0_th = 154; /*2.4 in Q6 */ + move16(); + voi_th = 24248; /*0.74f Q15 */ + move16(); + } + ELSE + { + ee0_th = 627; /*9.8f Q6 */ + move16(); + voi_th = 24904; /*0.76f Q15*/ + move16(); + } + + E_min_th = L_shl( E_MIN_IVAS_FX, sub( q_hp_E, Q19 ) ); + + test(); + test(); + test(); +#ifdef BASOP_NOGLOB + if ( ( EQ_16( st_fx->last_coder_type_raw, UNVOICED ) ) || /* previous frame was unvoiced */ + ( ( LT_32( ee[0], ee0_th ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy is concentrated in high frequencies provided that some energy is present in HF */ + ( LT_16( add_o( st_fx->voicing_fx[0], corr_shift, &Overflow ), voi_th ) ) ) ) /* normalized correlation is low */ +#else /* BASOP_NOGLOB */ + if ( ( EQ_16( st_fx->last_coder_type_raw, UNVOICED ) ) || /* previous frame was unvoiced */ + ( ( LT_32( ee[0], ee0_th ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy is concentrated in high frequencies provided that some energy is present in HF */ + ( LT_16( add( st_fx->voicing_fx[0], corr_shift ), voi_th ) ) ) ) /* normalized correlation is low */ +#endif /* BASOP_NOGLOB */ + { + tmp_offset_flag = 0; + move16(); + } + + /*-----------------------------------------------------------------* + * Decision about UC + *-----------------------------------------------------------------*/ + + /* SC-VBR - set additional parameters and thresholds for SC-VBR */ + mean_voi3_offset = 0; + move16(); + flag_low_relE = 0; + move16(); + ee1_th = 608; /*9.5 Q6*/ + move16(); + test(); + test(); + IF( st_fx->Opt_SC_VBR || ( EQ_16( st_fx->idchan, 1 ) && EQ_16( st_fx->element_mode, IVAS_CPE_TD ) ) ) /* Allow the low energy flag for the secondary channel */ + { + ee1_th = 544; /*8.5f Q6*/ + move16(); + + /* SC-VBR - determine the threshold on relative energy as a function of lp_noise */ + IF( NE_16( st_fx->input_bwidth, NB ) ) + { + /*relE_thres = 0.700f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16) */ + L_tmp = L_mac( -562036736, 22938, st_fx->lp_noise_fx ); + if ( Last_Resort == 0 ) + { + /*relE_thres = 0.650f * st->lp_noise - 33.5f; (lp_noise in Q8, constant Q8<<16)*/ + L_tmp = L_mac( -562036736, 21299, st_fx->lp_noise_fx ); + } + relE_thres = round_fx( L_tmp ); + } + ELSE + { + + /*relE_thres = 0.60f * st->lp_noise - 28.2f; (lp_noise in Q8, constant Q8<<16)*/ + L_tmp = L_mac( -473117491, 19661, st_fx->lp_noise_fx ); + relE_thres = round_fx( L_tmp ); + } + relE_thres = s_max( relE_thres, -6400 ); /* Q8 */ + + /* SC-VBR = set flag on low relative energy */ + if ( LT_16( relE, relE_thres ) ) + { + flag_low_relE = 1; + move16(); + } + + /* SC-VBR - correction of voicing threshold for NB inputs (important only in noisy conditions) */ + test(); + if ( EQ_16( st_fx->input_bwidth, NB ) && LT_16( vadnoise, 20 << 8 ) ) /* vadnoise in Q8, constant Q0<<8 */ + { + mean_voi3_offset = 1638; /*0.05f Q15*/ + move16(); + } + } + + /* make decision whether frame is unvoiced */ + coder_type = GENERIC; + move16(); + IF( EQ_16( st_fx->input_bwidth, NB ) ) + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); +#ifdef BASOP_NOGLOB + if ( ( ( LT_16( add_o( mean_voi3, corr_shift, &Overflow ), add( 22282, mean_voi3_offset ) ) ) && /* normalized correlation low */ + ( LT_16( add_o( st_fx->voicing_fx[2], corr_shift, &Overflow ), 25887 ) ) && /* normalized correlation low on look-ahead - onset detection */ + ( LT_32( ee[0], 640 ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */ + ( LT_32( ee[1], ee1_th ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */ + ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */ + /*( st_fx->music_hysteresis_fx == 0 ) &&*/ /* ... and in segment after AUDIO frames */ + ( LE_32( dE1, 237568 ) ) && /* Avoid on sharp energy spikes */ + ( LE_32( st_fx->old_dE1_fx, 237568 ) ) && /* + one frame hysteresis */ + ( st_fx->spike_hyst < 0 ) ) || /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */ + flag_low_relE ) /* low relative frame energy (only for SC-VBR) */ +#else + if ( ( ( LT_16( add( mean_voi3, corr_shift ), add( 22282, mean_voi3_offset ) ) ) && /* normalized correlation low */ + ( LT_16( add( st_fx->voicing_fx[2], corr_shift ), 25887 ) ) && /* normalized correlation low on look-ahead - onset detection */ + ( LT_32( ee[0], 640 ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */ + ( LT_32( ee[1], ee1_th ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */ + ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */ + /*( st_fx->music_hysteresis_fx == 0 ) &&*/ /* ... and in segment after AUDIO frames */ + ( LE_32( dE1, 237568 ) ) && /* Avoid on sharp energy spikes */ + ( LE_32( st_fx->old_dE1_fx, 237568 ) ) && /* + one frame hysteresis */ + ( st_fx->spike_hyst < 0 ) ) || /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */ + flag_low_relE ) /* low relative frame energy (only for SC-VBR) */ +#endif + { + coder_type = UNVOICED; + move16(); + } + } + ELSE + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); +#ifdef BASOP_NOGLOB + if ( ( ( LT_16( add_o( mean_voi3, corr_shift, &Overflow ), add( 22774, mean_voi3_offset ) ) ) && /* normalized correlation low */ + ( LT_16( add_sat( st_fx->voicing_fx[2], corr_shift ), 25887 ) ) && /* normalized correlation low on look-ahead - onset detection */ + ( LT_32( ee[0], 397 ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */ + ( LT_32( ee[1], 397 ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */ + ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */ + /*( st_fx->music_hysteresis_fx == 0 ) && */ /* ... and in segment after AUDIO frames */ + ( LE_32( dE1, 245760 ) ) && /* Avoid on sharp energy spikes */ + ( LE_32( st_fx->old_dE1_fx, 245760 ) ) && /* + one frame hysteresis */ + ( st_fx->spike_hyst < 0 ) ) /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */ + || ( flag_low_relE && ( LE_32( st_fx->old_dE1_fx, 245760 ) ) ) ) /* low relative frame energy (only for SC-VBR) */ +#else + if ( ( ( LT_16( add( mean_voi3, corr_shift ), add( 22774, mean_voi3_offset ) ) ) && /* normalized correlation low */ + ( LT_16( add( st_fx->voicing_fx[2], corr_shift ), 25887 ) ) && /* normalized correlation low on look-ahead - onset detection */ + ( LT_32( ee[0], 397 ) ) && ( GT_32( hp_E[0], E_min_th ) ) && /* energy concentrated in high frequencies provided that some energy is present in HF... */ + ( LT_32( ee[1], 397 ) ) && ( GT_32( hp_E[1], E_min_th ) ) && /* ... biased towards look-ahead to detect onsets */ + ( tmp_offset_flag == 0 ) && /* Take care of voiced offsets */ + /*( st_fx->music_hysteresis_fx == 0 ) && */ /* ... and in segment after AUDIO frames */ + ( LE_32( dE1, 245760 ) ) && /* Avoid on sharp energy spikes */ + ( LE_32( st_fx->old_dE1_fx, 245760 ) ) && /* + one frame hysteresis */ + ( st_fx->spike_hyst < 0 ) ) /* Avoid after sharp energy spikes followed by decay (e.g. castanets) */ + || ( flag_low_relE && ( LE_32( st_fx->old_dE1_fx, 245760 ) ) ) ) /* low relative frame energy (only for SC-VBR) */ +#endif + { + coder_type = UNVOICED; + move16(); + } + } + + /*-----------------------------------------------------------------* + * Decision about VC + *-----------------------------------------------------------------*/ + if ( st_fx->Opt_SC_VBR ) + { + hSC_VBR->set_ppp_generic = 0; + } + move16(); + + test(); + test(); + IF( EQ_16( st_fx->localVAD, 1 ) && EQ_16( coder_type, GENERIC ) && NE_16( last_core_orig, AMR_WB_CORE ) ) + { + dpit1 = abs_s( sub( T_op_fr[1], T_op_fr[0] ) ); + dpit2 = abs_s( sub( T_op_fr[2], T_op_fr[1] ) ); + dpit3 = abs_s( sub( T_op_fr[3], T_op_fr[2] ) ); + + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + IF( ( GT_16( voicing_fr[0], 19825 ) ) && /* normalized correlation high in 1st sf. */ + ( GT_16( voicing_fr[1], 19825 ) ) && /* normalized correlation high in 2st sf. */ + ( GT_16( voicing_fr[2], 19825 ) ) && /* normalized correlation high in 3st sf. */ + ( GT_16( voicing_fr[3], 19825 ) ) && /* normalized correlation high in 4st sf. */ + ( GT_32( mean_ee, 256 ) ) && /* energy concentrated in low frequencies */ + ( LT_16( dpit1, 3 << 6 ) ) && + ( LT_16( dpit2, 3 << 6 ) ) && + ( LT_16( dpit3, 3 << 6 ) ) ) + { + coder_type = VOICED; + move16(); + } + ELSE IF( st_fx->Opt_SC_VBR && EQ_16( st_fx->input_bwidth, NB ) && LT_16( vadnoise, 20 << 8 ) ) + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + IF( GT_16( voicing_fr[0], 8192 ) && /* normalized correlation high in 1st sf. */ + ( GT_16( voicing_fr[1], 8192 ) ) && /* normalized correlation high in 2st sf. */ + ( GT_16( voicing_fr[2], 8192 ) ) && /* normalized correlation high in 3st sf. */ + ( GT_16( voicing_fr[3], 8192 ) ) && /* normalized correlation high in 4st sf. */ + ( GT_32( mean_ee, 64 ) ) && /* energy concentrated in low frequencies */ + ( LT_16( dpit1, 5 << 6 ) ) && + ( LT_16( dpit2, 5 << 6 ) ) && + ( LT_16( dpit3, 5 << 6 ) ) ) + { + hSC_VBR->set_ppp_generic = 1; + move16(); + coder_type = VOICED; + move16(); + } + } + + /* set VOICED mode for frames with very stable pitch and high correlation + and avoid to switch to AUDIO/MUSIC later */ + voicing_m = mac_r( L_mac( L_mac( L_mult( voicing_fr[3], 8192 ), voicing_fr[2], 8192 ), voicing_fr[1], 8192 ), voicing_fr[0], 8192 ); + test(); + test(); + test(); + test(); + test(); + IF( *flag_spitch || ( LE_16( dpit1, 3 << 6 ) && LE_16( dpit2, 3 << 6 ) && LE_16( dpit3, 3 << 6 ) && + GT_16( voicing_m, 31130 ) && GT_16( st_fx->voicing_sm_fx, 31785 ) ) ) + { + coder_type = VOICED; + move16(); + *flag_spitch = 1; + move16(); /*to avoid switch to AUDIO/MUSIC later*/ + } + } + + /*-----------------------------------------------------------------* + * Channel-aware mode - set RF mode and total bitrate + *-----------------------------------------------------------------*/ + + st_fx->rf_mode = st_fx->Opt_RF_ON; + move16(); + + IF( EQ_16( coder_type, GENERIC ) ) + { + test(); + test(); + test(); + test(); + IF( ( LT_16( voicing_fr[0], 6554 ) ) && /* normalized correlation high in 2st sf. */ + ( LT_16( voicing_fr[1], 6554 ) ) && /* normalized correlation high in 2st sf. */ + ( LT_16( voicing_fr[2], 6554 ) ) && /* normalized correlation high in 3rd sf. */ + ( LT_16( voicing_fr[3], 6554 ) ) && /* normalized correlation high in 4th sf. */ + ( GT_16( vadnoise, 25 << 8 ) ) ) /* when speech is clean */ + + { + st_fx->rf_mode = 0; + move16(); + /* Current frame cannot be compressed to pack the partial redundancy;*/ + + IF( NE_16( st_fx->rf_mode, st_fx->Opt_RF_ON ) ) + { + core_coder_mode_switch_ivas_fx( st_fx, st_fx->last_total_brate, 0 ); + } + } + } + + /*-----------------------------------------------------------------* + * UNCLR classifier + *-----------------------------------------------------------------*/ + + IF( hStereoClassif != NULL ) + { + test(); + test(); + test(); + test(); + test(); + IF( st_fx->element_mode > EVS_MONO && ( EQ_16( coder_type, GENERIC ) || EQ_16( coder_type, UNVOICED ) || coder_type == INACTIVE || st_fx->localVAD == 0 ) && LT_16( hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan], MAX_UV_CNT ) ) + { + hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan] = add( hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan], 1 ); + move16(); + } + ELSE + { + hStereoClassif->unclr_sw_enable_cnt[st_fx->idchan] = 0; + move16(); + } + } + + /*-----------------------------------------------------------------* + * Updates + *-----------------------------------------------------------------*/ + + /* update spike hysteresis parameters */ + test(); + if ( st_fx->spike_hyst >= 0 && LT_16( st_fx->spike_hyst, 2 ) ) + { + st_fx->spike_hyst = add( st_fx->spike_hyst, 1 ); + move16(); + } + + /* reset spike hysteresis */ + test(); + test(); + test(); + if ( ( GT_16( st_fx->spike_hyst, 1 ) ) && + ( GT_16( dE3, 5 << 8 ) || /* energy increases */ +#ifdef BASOP_NOGLOB + ( GT_16( relE, -3328 ) && ( GT_16( add_sat( mean_voi3, corr_shift ), 22774 ) ) ) ) ) /* normalized correlation is high */ +#else + ( GT_16( relE, -3328 ) && ( GT_16( add( mean_voi3, corr_shift ), 22774 ) ) ) ) ) /* normalized correlation is high */ +#endif + { + st_fx->spike_hyst = -1; + move16(); + } + + /* update tilt parameters */ + st_fx->ee_old_fx = ee[1]; + move32(); /*Q6*/ + st_fx->old_dE1_fx = dE1; + move32(); /*Q13*/ + + /* save the raw coder_type for various modules later in the codec (the reason is that e.g. UNVOICED is lost at higher rates) */ + st_fx->coder_type_raw = coder_type; + move16(); + + return coder_type; +} +#endif // IVAS_FLOAT_FIXED /*-------------------------------------------------------------------* * find_uv() * diff --git a/lib_enc/find_wsp.c b/lib_enc/find_wsp.c index 97df5db897a0ec12625d376e68a48b5cd4fb4554..9e1db65a54da1e6127fe1f937b3de0f6426b3730 100644 --- a/lib_enc/find_wsp.c +++ b/lib_enc/find_wsp.c @@ -93,17 +93,24 @@ void find_wsp( return; } void ivas_find_wsp( - const Word16 L_frame, /* i : length of the frame */ - const Word16 L_subfr, /* i : length of subframe */ - const Word16 nb_subfr, /* i : number of subframes */ - const Word16 *A_fx, /* i : A(z) filter coefficients */ - Word16 *Aw_fx, /* o : weighted A(z) filter coefficients */ - const Word16 *speech_fx, /* i : pointer to the denoised speech frame */ - const Word16 tilt_fact, /* i : tilt factor */ - Word16 *wsp_fx, /* o : poitnter to the weighted speech frame */ - Word16 *mem_wsp_fx, /* i/o: W(Z) denominator memory */ - const Word16 gamma, /* i : weighting factor */ - const Word16 L_look /* i : look-ahead */ + const Word16 L_frame, /* i : length of the frame */ + const Word16 L_subfr, /* i : length of subframe */ + const Word16 nb_subfr, /* i : number of subframes */ + const Word16 *A_fx, + /* i : A(z) filter coefficients */ // Q12 + Word16 *Aw_fx, + /* o : weighted A(z) filter coefficients */ // Q12 + const Word16 *speech_fx, + /* i : pointer to the denoised speech frame */ // Q_new + const Word16 tilt_fact, + /* i : tilt factor */ // Q15 + Word16 *wsp_fx, + /* o : poitnter to the weighted speech frame */ // Q_new + Word16 *mem_wsp_fx, + /* i/o: W(Z) denominator memory */ // Q_new + const Word16 gamma, + /* i : weighting factor */ // Q15 + const Word16 L_look /* i : look-ahead */ ) { Word16 *p_Aw_fx, tmp_fx; diff --git a/lib_enc/gain_enc_fx.c b/lib_enc/gain_enc_fx.c index a0398c50234d3bc3f2da578bda2199b8a4b9b764..2e53060b5f5dc16dd086f0aa3ee16b23ea1e92ce 100644 --- a/lib_enc/gain_enc_fx.c +++ b/lib_enc/gain_enc_fx.c @@ -540,6 +540,337 @@ void gain_enc_mless_fx( return; } +void gain_enc_mless_ivas_fx( + BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ + const Word16 gains_mode[], /* i : gain bits */ + const Word16 element_mode, /* i : element mode */ + const Word16 L_frame, /* i : length of the frame */ + const Word16 i_subfr, /* i : subframe index */ + const Word16 tc_subfr, /* i : TC subframe index */ + const Word16 *xn, /* i : target vector */ + const Word16 *y1, /* i : zero-memory filtered adaptive excitation */ + const Word16 Q_xn, /* i : xn and y1 scaling */ + const Word16 *y2, /* i : zero-memory filtered algebraic codebook excitation */ + const Word16 *code, /* i : algebraic excitation */ + const Word16 Es_pred, /* i : predicted scaled innovation energy */ + Word16 *gain_pit, /* o : quantized pitch gain */ + Word32 *gain_code, /* o : quantized codebook gain */ + Word16 *gain_inov, /* o : gain of the innovation (used for normalization) */ + Word32 *norm_gain_code, /* o : norm. gain of the codebook excitation */ + Word16 *g_corr, /* i/o: correlations , -2,, -2 and 2 */ + const Word16 clip_gain /* i : gain pitch clipping flag (1 = clipping) */ +) +{ + + Word16 index, size, nBits, nBits2; + Word16 gcode0, Ei, gain_code16; + const Word16 *qua_table; + Word16 coeff[5], exp_coeff[5]; + Word16 exp, exp_code, exp_inov, exp_gcode0, frac, tmp; + Word32 L_tmp, L_tmp1, L_tmp2; + Word16 tmp1, expg; + Word16 exp1, exp2; + Word16 exp_num, exp_den, exp_div, frac_den; + Word32 L_frac_num, L_frac_den, L_div; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; +#endif + + /*-----------------------------------------------------------------* + * calculate the rest of the correlation coefficients + * c2 = , c3 = -2, c4 = 2 + *-----------------------------------------------------------------*/ + + coeff[0] = g_corr[0]; + move16(); + exp_coeff[0] = g_corr[1]; + move16(); + coeff[1] = negate( g_corr[2] ); + move16(); /* coeff[1] = -2 xn yy1 */ + exp_coeff[1] = add( g_corr[3], 1 ); + move16(); + + /* Compute scalar product */ + coeff[2] = extract_h( Dot_product12( y2, y2, L_SUBFR, &exp ) ); + exp_coeff[2] = add( sub( exp, 18 ), shl( Q_xn, 1 ) ); + move16(); /* -18 (y2 Q9) */ + + /* Compute scalar product -2* */ + coeff[3] = extract_h( L_negate( Dot_product12( xn, y2, L_SUBFR, &exp ) ) ); + exp_coeff[3] = add( sub( exp, 9 - 1 ), Q_xn ); + move16(); /* -9 (y2 Q9), +1 (2 xn y2) */ + + /* Compute scalar product 2* */ + coeff[4] = extract_h( Dot_product12( y1, y2, L_SUBFR, &exp ) ); + exp_coeff[4] = add( sub( exp, 9 - 1 ), Q_xn ); + move16(); /* -9 (y2 Q9), +1 (2 y1 y2) */ + + /*-----------------------------------------------------------------* + * calculate the unscaled innovation energy + * calculate the predicted gain code + *-----------------------------------------------------------------*/ + + /* gain_inov = 1.0f / sqrt((dot_product(code, code, L_SUBFR) + 0.01) / L_SUBFR) */ + L_tmp = Dot_product12( code, code, L_SUBFR, &exp_code ); + exp_inov = sub( exp_code, 18 + 6 ); + exp_code = sub( exp_code, 30 ); + + /*Ei = 10 * log10((dot_product(code, code, L_SUBFR) + 0.01) / L_SUBFR) */ + + /*----------------------------------------------------------------* + * calculate the predicted gain code + *----------------------------------------------------------------*/ + tmp = norm_l( L_tmp ); + frac = Log2_norm_lc( L_shl( L_tmp, tmp ) ); + tmp = add( 30 - 18 - 6 - 1, sub( exp_code, tmp ) ); /* exp: -18 (code in Q9), -6 (/L_SUBFR) */ + L_tmp1 = Mpy_32_16( tmp, frac, 12330 ); /* Q13 */ + Ei = round_fx( L_shl( L_tmp1, 11 ) ); /* Q8 */ + + /* predicted codebook gain */ + gcode0 = sub( Es_pred, Ei ); /* Q8 */ + + /*---------------------------------------------------------------* + * Decode codebook gain and the adaptive excitation low-pass + * filtering factor (Finalize computation ) + *---------------------------------------------------------------*/ + /* gain_inov = 1.0f / sqrt((dot_product(code, code, L_SUBFR) + 0.01) / L_SUBFR) */ + L_tmp = Isqrt_lc( L_tmp, &exp_inov ); + *gain_inov = extract_h( L_shl( L_tmp, sub( exp_inov, 3 ) ) ); /* gain_inov in Q12 */ + + /* gcode0 = pow(10, 0.05 * (Es_pred - Ei)) */ + /*----------------------------------------------------------------* + * gcode0 = pow(10.0, gcode0/20) + * = pow(2, 3.321928*gcode0/20) + * = pow(2, 0.166096*gcode0) + *----------------------------------------------------------------*/ + + L_tmp = L_mult( gcode0, 21771 ); /* *0.166096 in Q17 -> Q26 */ + L_tmp = L_shr( L_tmp, 10 ); /* From Q26 to Q16 */ + frac = L_Extract_lc( L_tmp, &exp_gcode0 ); /* Extract exponent of gcode0 */ + + gcode0 = extract_l( Pow2( 14, frac ) ); /* Put 14 as exponent so that */ + /* output of Pow2() will be: */ + /* 16384 < Pow2() <= 32767 */ + exp_gcode0 = sub( exp_gcode0, 14 ); + + /*-----------------------------------------------------------------* + * select the codebook, size and number of bits + * set the gains searching range + *-----------------------------------------------------------------*/ + nBits = gains_mode[shr( i_subfr, 6 )]; + move16(); + + test(); + test(); + test(); + test(); + test(); + IF( ( EQ_16( tc_subfr, 3 * L_SUBFR ) && EQ_16( i_subfr, 3 * L_SUBFR ) && EQ_16( L_frame, L_FRAME ) ) || + ( EQ_16( tc_subfr, 4 * L_SUBFR ) && EQ_16( i_subfr, 4 * L_SUBFR ) && EQ_16( L_frame, L_FRAME16k ) ) ) + { + /* *gain_pit = (g_corr[2]*tmp2) - (0.5f*g_corr[4]*tmp3); + = ((-0.5f*g_corr[1]*g_corr[2]) - (-0.25*g_corr[3]*g_corr[4]))/tmp1; + = ((0.25*g_corr[3]*g_corr[4]) - (0.5*g_corr[1]*g_corr[2]))/tmp1; */ + + /* *gain_code = (g_corr[0]*tmp3) - (0.5f*g_corr[4]*tmp2); + = ((-0.5*g_corr[3]*g_corr[0]) - (-0.25*g_corr[1]*g_corr[4]))/tmp1; + = ((0.25*g_corr[1]*g_corr[4]) - (0.5*g_corr[0]*g_corr[3]))/tmp1; */ + + L_tmp1 = L_mult( coeff[0], coeff[2] ); /*Q31*/ + exp1 = add( exp_coeff[0], exp_coeff[2] ); + + L_tmp2 = L_shr( L_mult( coeff[4], coeff[4] ), 2 ); /*Q31*/ + exp2 = add( exp_coeff[4], exp_coeff[4] ); + + IF( GT_16( exp1, exp2 ) ) + { + L_tmp2 = L_shr( L_tmp2, sub( exp1, exp2 ) ); /*Q31*/ + exp_den = exp1; + move16(); + } + ELSE + { + L_tmp1 = L_shr( L_tmp1, sub( exp2, exp1 ) ); /*Q31*/ + exp_den = exp2; + move16(); + } + L_frac_den = L_sub( L_tmp1, L_tmp2 ); /*Q31*/ + + frac_den = extract_h( L_frac_den ); + frac_den = s_max( frac_den, 1 ); + L_frac_den = L_max( L_frac_den, 1 ); + exp = norm_l( L_frac_den ); + tmp = div_s( shl( 1, sub( 14, exp ) ), frac_den ); /*Q(14-exp)*/ + + L_tmp1 = L_shr( L_mult( coeff[3], coeff[4] ), 2 ); /*Q31*/ + exp1 = add( exp_coeff[3], exp_coeff[4] ); + + L_tmp2 = L_shr( L_mult( coeff[1], coeff[2] ), 1 ); /*Q31*/ + exp2 = add( exp_coeff[1], exp_coeff[2] ); + + IF( GT_16( exp1, exp2 ) ) + { + L_tmp2 = L_shr( L_tmp2, sub( exp1, exp2 ) ); /*Q31*/ + exp_num = exp1; + move16(); + } + ELSE + { + L_tmp1 = L_shr( L_tmp1, sub( exp2, exp1 ) ); /*Q31*/ + exp_num = exp2; + move16(); + } + L_frac_num = L_sub( L_tmp1, L_tmp2 ); /*Q31*/ + + L_div = Mult_32_16( L_frac_num, tmp ); /*Q(30-exp)*/ + exp_div = sub( exp_num, exp_den ); + +#ifdef BASOP_NOGLOB + *gain_pit = round_fx_o( L_shl_o( L_div, add( exp, exp_div ), &Overflow ), &Overflow ); /*Q14*/ +#else + *gain_pit = round_fx( L_shl( L_div, add( exp, exp_div ) ) ); /*Q14*/ +#endif + + L_tmp1 = L_shr( L_mult( coeff[1], coeff[4] ), 2 ); /*Q31*/ + exp1 = add( exp_coeff[1], exp_coeff[4] ); + + L_tmp2 = L_shr( L_mult( coeff[0], coeff[3] ), 1 ); /*Q31*/ + exp2 = add( exp_coeff[0], exp_coeff[3] ); + + IF( GT_16( exp1, exp2 ) ) + { + L_tmp2 = L_shr( L_tmp2, sub( exp1, exp2 ) ); /*Q31*/ + exp_num = exp1; + } + ELSE + { + L_tmp1 = L_shr( L_tmp1, sub( exp2, exp1 ) ); /*Q31*/ + exp_num = exp2; + } + L_frac_num = L_sub( L_tmp1, L_tmp2 ); /*Q31*/ + + L_div = Mult_32_16( L_frac_num, tmp ); /*Q(30-exp)*/ + exp_div = sub( exp_num, exp_den ); + +#ifdef BASOP_NOGLOB + *gain_code = L_shl_o( L_div, sub( add( exp, exp_div ), 14 ), &Overflow ); +#else + *gain_code = L_shl( L_div, sub( add( exp, exp_div ), 14 ) ); +#endif + move32(); /*Q16*/ + + *gain_pit = s_max( G_PITCH_MIN_TC192_Q14, s_min( *gain_pit, G_PITCH_MAX_TC192_Q14 ) ); + + /* set number of bits for two SQs */ + nBits2 = shr( add( nBits, 1 ), 1 ); + nBits = shr( nBits, 1 ); + + /* gain_pit Q */ + + tmp1 = mult_r( G_PITCH_MAX_MINUS_MIN_TC192_Q13, div_s( 1, sub( shl( 1, nBits ), 1 ) ) ); /*Q13*/ /* set quantization step */ + index = usquant_fx( *gain_pit, gain_pit, G_PITCH_MIN_TC192_Q14, tmp1, shl( 1, nBits ) ); + move16(); + push_indice( hBstr, IND_GAIN_PIT, index, nBits ); + + /* gain_code Q */ + /**gain_code /= gcode0;*/ + IF( gcode0 != 0 ) + { + tmp = div_s( 16384, gcode0 ); /*Q15*/ + L_tmp = Mult_32_16( *gain_code, tmp ); /*Q16*/ + *gain_code = L_shr( L_tmp, add( 14, exp_gcode0 ) ); /*Q16*/ + } + + index = gain_quant_fx( gain_code, &gain_code16, LG10_G_CODE_MIN_TC192_Q14, LG10_G_CODE_MAX_TC192_Q13, nBits2, &expg ); + push_indice( hBstr, IND_GAIN_CODE, index, nBits2 ); + L_tmp = L_mult( gain_code16, gcode0 ); /*Q0*Q0 -> Q1*/ +#ifdef BASOP_NOGLOB + *gain_code = L_shl_o( L_tmp, add( add( expg, exp_gcode0 ), 15 ), &Overflow ); /*Q16*/ +#else + *gain_code = L_shl( L_tmp, add( add( expg, exp_gcode0 ), 15 ) ); /*Q16*/ +#endif + } + ELSE + { + size = shl( 1, nBits ); + + SWITCH( nBits ) + { + case 7: + { + qua_table = gain_qua_mless_7b_fx; + move16(); + if ( EQ_16( clip_gain, 1 ) ) + { + size = sub( size, 30 ); + } + BREAK; + } + case 6: + { + qua_table = gain_qua_mless_6b_fx; + if ( GT_16( element_mode, EVS_MONO ) ) + { +#ifdef IVAS_CODE + qua_table = gain_qua_mless_6b_stereo; +#else + // PMTE() +#endif + } + move16(); + if ( EQ_16( clip_gain, 1 ) ) + { + size = sub( size, 14 ); + } + BREAK; + } + case 5: + { + qua_table = gain_qua_mless_5b_fx; + move16(); + if ( EQ_16( clip_gain, 1 ) ) + { + size = sub( size, 6 ); + } + BREAK; + } + default: + { + qua_table = gain_qua_mless_6b_fx; + move16(); + if ( EQ_16( clip_gain, 1 ) ) + { + size = sub( size, 14 ); + } + BREAK; + } + } + + /* in case of AVQ inactive, limit the gain_pit to 0.65 */ + test(); + IF( EQ_16( clip_gain, 2 ) && EQ_16( nBits, 6 ) ) + { + size = sub( size, 36 ); + nBits = sub( nBits, 1 ); + } + + /*-----------------------------------------------------------------* + * search for the best quantizer + *-----------------------------------------------------------------*/ + index = Find_Opt_gainQ_fx( coeff, exp_coeff, gain_pit, gain_code, gcode0, exp_gcode0, qua_table, size ); + push_indice( hBstr, IND_GAIN, index, nBits ); + } + + /* *norm_gain_code = *gain_code / *gain_inov; */ + exp = sub( norm_s( *gain_inov ), 1 ); + exp = s_max( exp, 0 ); + + tmp = div_s( shr( 8192, exp ), *gain_inov ); + *norm_gain_code = L_shr( Mult_32_16( *gain_code, tmp ), sub( 1, exp ) ); + move32(); + + return; +} /*---------------------------------------------------------------------* * gain_enc_SQ() diff --git a/lib_enc/init_enc.c b/lib_enc/init_enc.c index 2bad8439083441e0a546f7defd87df9f9078ea57..9edf656240f2d920abd77aaa1d0a6767610a3334 100644 --- a/lib_enc/init_enc.c +++ b/lib_enc/init_enc.c @@ -157,6 +157,9 @@ ivas_error init_encoder( init_gp_clip( st->clip_var ); pitch_ol_init( &st->old_thres, &st->old_pitch, &st->delta_pit, &st->old_corr ); set_f( st->old_wsp, 0, L_WSP_MEM ); +#ifdef IVAS_FLOAT_FIXED + set16_fx( st->old_wsp_fx, 0, L_WSP_MEM ); +#endif set_f( st->old_wsp2, 0, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM ); st->mem_preemph = 0.0f; @@ -183,6 +186,9 @@ ivas_error init_encoder( set_f( st->Bin_E, 0, L_FFT ); st->ee_old = 10.0f; +#ifdef IVAS_FLOAT_FIXED + st->ee_old_fx = 10 << 6; // 10 in Q6 +#endif st->Nb_ACELP_frames = 0; st->audio_frame_cnt = AUDIO_COUNTER_INI; /* Initialization of the audio frame counter mildly into the audio mode */ @@ -232,6 +238,9 @@ ivas_error init_encoder( /* find_uv() parameters */ st->old_dE1 = 0.0f; +#ifdef IVAS_FLOAT_FIXED + st->old_dE1_fx = 0; +#endif st->old_ind_deltaMax = 0; set_f( st->old_enr_ssf, 0.0f, 2 * NB_SSF ); st->spike_hyst = -1; diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index 1f3c9fc91aba19bd1eb97c233756d3af8431cf5a..5cb7de2f685f3b42a6e3d9dee47f9f91dee8ffbb 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -1059,8 +1059,7 @@ ivas_error ivas_core_enc( fb_tbe_enc( st, st->input, fb_exc ); #else #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 Q_fb_exc, Q_input; - Word16 fb_exc_fx[L_FRAME16k]; + Word16 Q_input; Q_fb_exc = Q_factor_arr( fb_exc, L_FRAME16k ); floatToFixed_arr( fb_exc, fb_exc_fx, Q_fb_exc, L_FRAME16k ); // Q_input is being calculated inside already diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index f3b7fce10ae5885eeb48104c9fde7cdfe68cabe6..2f827ce81f0a34761530d94c09dc6b3bf37c136b 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -934,6 +934,15 @@ ivas_error pre_proc_front_ivas_fx( f2me_buf_16( st->inp_12k8_mem_stereo_sw, st->inp_12k8_mem_stereo_sw_fx, &inp_12k8_mem_stereo_sw_e, 4 ); f2me_16( st->mem_preemph_DFT, &st->mem_preemph_DFT_fx, &mem_preemph_DFT_e ); f2me_16( st->mem_preemph, &st->mem_preemph_fx, &mem_preemph_e ); + Word16 relE_fx; + Word16 old_wsp_fx[L_WSP]; + Word16 pitch_fr_fx[NB_SUBFR]; + Word16 voicing_fr_fx[NB_SUBFR]; + Word16 A_fx[NB_SUBFR16k * ( M + 1 )]; + Word16 epsP_h[M + 1]; /* o : LP prediction errors */ + Word16 epsP_l[M + 1]; /* o : LP prediction errors */ + Word16 cor_map_sum16_fx; /* o : speech/music clasif. parameter */ + Word32 fr_bands_LR_fx[2][2 * NB_BANDS]; #endif // IVAS_FLOAT_FIXED_TO_BE_REMOVED float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */ float *wsp; /* weighted input signal buffer */ @@ -942,29 +951,26 @@ ivas_error pre_proc_front_ivas_fx( float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ float tmpN[NB_BANDS]; /* Temporary noise update */ float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ - Word32 tmpN_fx[NB_BANDS]; /* Temporary noise update */ - Word32 tmpE_fx[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ float tmpN_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary noise update */ float tmpE_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary averaged energy of 2 sf. */ float cor_map_sum_LR[CPE_CHANNELS]; /* speech/music clasif. parameter */ - float non_staX_LR; /* non-stationarity for sp/mus classifier */ - float ncharX_LR; /* noise character for sp/mus classifier */ - float sp_div_LR; /* spectral diversity feature */ - float S_map_LR[L_FFT / 2]; /* short-term correlation map */ - float corr_shiftL; /* correlation shift */ - float corr_shiftR; /* correlation shift */ - Word16 loc_harmLR[CPE_CHANNELS]; /* harmonicity flag */ - Word16 lr_vad_enabled; /* LR VAD indicator */ - float ee[2]; /* Spectral tilt */ - float corr_shift; /* correlation shift */ - float sp_div, PS[128]; /* speech/music clasif. parameters */ - Word32 sp_div_fx; - Word16 L_look; /* length of look-ahead */ + // float non_staX_LR; /* non-stationarity for sp/mus classifier */ + // float ncharX_LR; /* noise character for sp/mus classifier */ + // float sp_div_LR; /* spectral diversity feature */ + float S_map_LR[L_FFT / 2]; /* short-term correlation map */ + float corr_shiftL; /* correlation shift */ + float corr_shiftR; /* correlation shift */ + // Word16 loc_harmLR[CPE_CHANNELS]; /* harmonicity flag */ + Word16 lr_vad_enabled; /* LR VAD indicator */ + float ee[2]; /* Spectral tilt */ + float corr_shift; /* correlation shift */ + float sp_div, PS[128]; /* speech/music clasif. parameters */ + Word16 L_look; /* length of look-ahead */ #if 0 float mem_decim_dummy[2 * L_FILT_MAX]; /* dummy decimation filter memory */ float temp1F_icatdmResampBuf[L_FILT_MAX]; /* temp buffers for ICA TDM resamplers */ #endif - float hp_E[2]; /* Energy in HF */ + float hp_E[2]; /* Energy in HF */ Word16 flag_spitch; Word16 high_lpn_flag; float lsf_new[M]; @@ -980,14 +986,15 @@ ivas_error pre_proc_front_ivas_fx( Word16 alw_pitch_lag_12k8[2]; float alw_voicing[2]; Word16 last_core_orig; - float dummy; + // float dummy; float S_map[L_FFT / 2]; - Word16 S_map_fx[L_FFT / 2]; + // Word16 S_map_fx[L_FFT / 2]; Word16 i, lMemRecalc, lMemRecalc_12k8; Word16 smc_dec; - float ncharX, dE1X; - // Encoder_State *st; - // float *signal_in; + // float dE1X; + // float ncharX; + // Encoder_State *st; + // float *signal_in; Word16 element_mode; Word32 input_Fs, last_element_brate; Word16 *tdm_SM_last_clas, tmpS; @@ -1018,7 +1025,6 @@ ivas_error pre_proc_front_ivas_fx( Word16 *temp1F_icatdmResampBuf_fx; Word16 *old_inp_16k_fx; Word16 *mem_decim_dummy_fx; /* dummy decimation filter memory */ - Word32 Etot_fx; /* total energy */ #if 1 Word16 Etot_16fx; /* total energy, Q8 */ #endif @@ -1026,31 +1032,54 @@ ivas_error pre_proc_front_ivas_fx( #ifndef REMOVE_IVAS_UNUSED_PARAMETERS_WARNING Word32 *res_cod_SNR_M_fx; #endif - Word16 corr_shiftL_fx; - Word16 corr_shiftR_fx; Word16 snr_sum_he_fx; /* HE SAD parameters */ - Word16 old_wsp_fx[L_WSP]; - Word32 fr_bands_fx[2 * NB_BANDS]; /* energy in frequency bands */ Word16 new_inp_out_size; Word16 Q_new_inp; Word16 mem_decim_size; - Word16 *wsp_fx; - Word16 pitch_fr_fx[NB_SUBFR]; - Word16 voicing_fr_fx[NB_SUBFR]; + // Word16 *wsp_fx; + // Word16 pitch_fr_fx[NB_SUBFR]; + // Word16 voicing_fr_fx[NB_SUBFR]; #ifndef REMOVE_IVAS_UNUSED_PARAMETERS_WARNING Word16 Q_new; Word16 corr_shift_fx; #endif - Word16 dummy_fx; - Word16 Etot_LR_fx[2]; #if 0 Word16 Q_exp; Word32 Le_min_scaled; Word16 relE_fx; #endif - Word32 fr_bands_LR_fx[2][2 * NB_BANDS]; + + Word16 dummy_fx; + Word16 ncharX_fx; + Word16 ncharX_LR_fx; /* noise character for sp/mus classifier */ + Word16 loc_harmLR_fx[CPE_CHANNELS]; /* harmonicity flag */ + Word16 non_staX16_fx; + Word16 non_staX_LR_fx; /* non-stationarity for sp/mus classifier */ + Word32 sp_div_fx; + Word16 sp_div16_fx; + Word16 q_sp_div; + Word16 sp_div_LR_fx; + Word16 q_sp_div_LR; + Word16 *wsp_fx; + Word32 dE1X_fx; + Word16 Etot_LR_fx[2]; + Word16 S_map_fx[L_FFT / 2]; + Word16 cor_map_sum_LR_fx[CPE_CHANNELS]; /* speech/music clasif. parameter */ + Word16 S_map_LR_fx[L_FFT / 2]; /* short-term correlation map */ + Word32 Etot_fx; /* total energy */ + Word32 tmpN_fx[NB_BANDS]; /* Temporary noise update */ + Word32 tmpE_fx[NB_BANDS]; /* Temporary averaged energy of 2 sf. */ Word32 tmpN_LR_fx[CPE_CHANNELS][NB_BANDS]; /* Temporary noise update */ Word32 tmpE_LR_fx[CPE_CHANNELS][NB_BANDS]; /* Temporary averaged energy of 2 sf. */ + Word16 corr_shiftL_fx; /* correlation shift */ + Word16 corr_shiftR_fx; /* correlation shift */ + Word16 corr_shift_fx; /* correlation shift */ + Word32 ee_fx[2]; /* Spectral tilt */ + Word32 fr_bands_fx[2 * NB_BANDS]; /* energy in frequency bands */ + Word32 lf_E_fx[2 * VOIC_BINS]; /* per bin spectrum energy in lf */ + Word32 lf_E_LR_fx[2][2 * VOIC_BINS]; /* per bin spectrum energy in lf */ + Word32 hp_E_fx[2]; /* Energy in HF */ + enerBuffer_exp = 0; move16(); enerBuffer_fx = (Word32 *) malloc( 60 * sizeof( Word32 * ) ); @@ -1233,7 +1262,7 @@ ivas_error pre_proc_front_ivas_fx( new_inp_12k8 = old_inp_12k8 + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */ inp_12k8 = new_inp_12k8 - L_look; /* pointer to the current frame of input signal in 12.8kHz core */ #endif - new_inp_12k8_fx = old_inp_12k8_fx + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */ + new_inp_12k8_fx = old_inp_12k8_fx + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */ inp_12k8_fx = new_inp_12k8_fx - L_look; /* pointer to the current frame of input signal in 12.8kHz core */ #ifdef IVAS_FLOAT_FIXED_TO_BE_REMOVED @@ -1257,7 +1286,8 @@ ivas_error pre_proc_front_ivas_fx( mvr2r( st->old_wsp, old_wsp, L_WSP_MEM ); wsp = old_wsp + L_WSP_MEM; /* pointer to the current frame of weighted signal in 12.8kHz core */ #endif - + Copy( st->old_wsp_fx, old_wsp_fx, L_WSP_MEM ); + wsp_fx = old_wsp_fx + L_WSP_MEM; /* pointer to the current frame of weighted signal in 12.8kHz core */ IF( NE_16( element_mode, IVAS_CPE_DFT ) ) { @@ -1573,7 +1603,7 @@ ivas_error pre_proc_front_ivas_fx( st->hNoiseEst->sign_dyn_lp_32fx = (Word32) ( st->hNoiseEst->sign_dyn_lp * 16777216.0 ); /*float to fix conversions for wb_vad_ivas_fx*/ - Word16 Q_new = Q_factor_arr( fr_bands, 40 ); + Word16 Q_new = Q_factor_arr( fr_bands, 40 ) + 3; floatToFixed_arrL( fr_bands, fr_bands_fx, Q_new + QSCALE, 40 ); st->lp_noise_fx = float_to_fix16( st->lp_noise, 8 ); st->lp_speech_fx = float_to_fix16( st->lp_speech, 8 ); @@ -1816,7 +1846,6 @@ ivas_error pre_proc_front_ivas_fx( corr_shift = correlation_shift( st->hNoiseEst->totalNoise ); #else #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 corr_shift_fx; // performing float to fix conversion of (totalNoise) st->hNoiseEst->totalNoise_fx = float_to_fix16( st->hNoiseEst->totalNoise, 8 ); #endif @@ -1838,9 +1867,9 @@ ivas_error pre_proc_front_ivas_fx( st->last_totalNoise_fx = float_to_fix16( st->last_totalNoise, Q8 ); st->hNoiseEst->totalNoise_fx = float_to_fix16( st->hNoiseEst->totalNoise, Q8 ); - FOR( Word16 n = 0; n < TOTALNOISE_HIST_SIZE; n++ ) + FOR( Word16 j = 0; j < TOTALNOISE_HIST_SIZE; j++ ) { - st->totalNoise_increase_hist_fx[n] = float_to_fix16( st->totalNoise_increase_hist[n], Q8 ); + st->totalNoise_increase_hist_fx[j] = float_to_fix16( st->totalNoise_increase_hist[j], Q8 ); move16(); } #endif @@ -1996,6 +2025,7 @@ ivas_error pre_proc_front_ivas_fx( total_brate = ( element_mode == IVAS_SCE ) ? st->total_brate : st->bits_frame_nominal * FRAMES_PER_SEC; configureFdCngEnc( st->hFdCngEnc, max( st->input_bwidth, WB ), total_brate ); + configureFdCngEnc_ivas_fx( st->hFdCngEnc, max( st->input_bwidth, WB ), total_brate ); if ( hCPE != NULL ) { st->hFdCngEnc->hFdCngCom->CngBitrate = hCPE->element_brate - 1; @@ -2045,6 +2075,58 @@ ivas_error pre_proc_front_ivas_fx( * 1/4 pitch precision improvement *----------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 Aw_fx[NB_SUBFR16k * ( M + 1 )] = { 0 }; + Word16 buf[15000] = { 0 }; + Word16 *inp_12k8_fx1 = &buf[M]; // this is done because find_wsp accesses inp from -16 index + Word16 wsp_fx1[L_FRAME + L_LOOK_12k8] = { 0 }; + + for ( i = 0; i < ( NB_SUBFR16k - 1 ) * ( M + 1 ); i++ ) + { + A_fx[i] = (Word16) floatToFixed( A[i], Q12 ); + } + Word16 e1, e2, q_inp_12k8; + f2me_buf_16( inp_12k8 - M, inp_12k8_fx1, &e1, 368 + M ); + f2me_buf_16( &st->mem_wsp, &st->mem_wsp_fx, &e2, 1 ); + q_inp_12k8 = sub( 15, s_max( e1, e2 ) ); + for ( i = -M; i < 368; i++ ) + { /* starting index from -16 */ + inp_12k8_fx1[i] = (Word16) floatToFixed( inp_12k8[i], q_inp_12k8 ); + } + st->mem_wsp_fx = (Word16) floatToFixed( st->mem_wsp, q_inp_12k8 ); +#endif + + ivas_find_wsp( L_FRAME, L_SUBFR, NB_SUBFR, A_fx, Aw_fx, inp_12k8_fx1, TILT_FAC_FX, wsp_fx1, &st->mem_wsp_fx, GAMMA1, L_LOOK_12k8 ); + + IF( EQ_16( st->vad_flag, 0 ) ) + { + /* reset the OL pitch tracker memories during inactive frames */ + pitch_ol_init_fx( &st->old_thres_fx, &st->old_pitch, &st->delta_pit, &st->old_corr_fx ); + } + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + FOR( i = 0; i < ( NB_SUBFR16k - 1 ); i++ ) + { + FOR( int j = 0; j < M + 1; j++ ) + { + Aw[i * ( M + 1 ) + j] = (float) Aw_fx[i * ( M + 1 ) + j] / Aw_fx[i * ( M + 1 )]; + } + } + FOR( i = 0; i < L_FRAME + L_LOOK_12k8; i++ ) + { + wsp[i] = fixedToFloat( wsp_fx1[i], q_inp_12k8 ); + } + st->mem_wsp = fixedToFloat( st->mem_wsp_fx, q_inp_12k8 ); + + IF( EQ_16( st->vad_flag, 0 ) ) + { + st->old_thres = (float) ( st->old_thres_fx ); // no Q-factor used here since it's initialised to 0. + st->old_corr = (float) ( st->old_corr_fx ); // no Q-factor used here since it's initialised to 0. + } +#endif + +#else find_wsp( L_FRAME, L_SUBFR, NB_SUBFR, A, Aw, inp_12k8, TILT_FAC, wsp, &st->mem_wsp, GAMMA1_FLT, L_look ); if ( st->vad_flag == 0 ) @@ -2052,6 +2134,7 @@ ivas_error pre_proc_front_ivas_fx( /* reset the OL pitch tracker memories during inactive frames */ pitch_ol_init( &st->old_thres, &st->old_pitch, &st->delta_pit, &st->old_corr ); } +#endif old_pitch1 = st->pitch[1]; @@ -2059,23 +2142,192 @@ ivas_error pre_proc_front_ivas_fx( /* Updates for adaptive lag window memory */ st->old_pitch_la = st->pitch[2]; + #ifdef IVAS_FLOAT_FIXED + #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 q_wsp = Q_factor_arr( old_wsp, L_WSP ) - 5; - floatToFixed_arr( old_wsp, old_wsp_fx, q_wsp, L_WSP ); - floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); + Word16 q_wsp = Q_factor_arr( old_wsp, L_WSP ) - 3; + floatToFixed_arr16( old_wsp, old_wsp_fx, q_wsp, L_WSP ); + floatToFixed_arr16( st->voicing, st->voicing_fx, Q15, 3 ); st->voicing_sm_fx = float_to_fix16( st->voicing_sm, Q15 ); st->voicing0_sm_fx = float_to_fix16( st->voicing0_sm, Q15 ); st->LF_EnergyRatio_sm_fx = float_to_fix16( st->LF_EnergyRatio_sm, Q7 ); - st->diff_sm_fx = floatToFixed_32( st->diff_sm, Q7 ); - st->energy_sm_fx = floatToFixed_32( st->energy_sm, Q7 ); - floatToFixed_arr( st->Bin_E, st->lgBin_E_fx, Q7, L_FFT / 2 ); // Function StableHighPitchDetect_fx excepts st->lgBin_E_fx to be in Q7 + st->diff_sm_fx = float_to_fix( st->diff_sm, Q7 ); + st->energy_sm_fx = float_to_fix( st->energy_sm, Q7 ); + + floatToFixed_arr16( st->Bin_E, st->lgBin_E_fx, Q7, L_FFT / 2 ); // Function StableHighPitchDetect_fx excepts st->lgBin_E_fx to be in Q7 + + corr_shift_fx = float_to_fix16( corr_shift, Q15 ); + corr_shiftL_fx = float_to_fix16( corr_shiftL, Q15 ); + corr_shiftR_fx = float_to_fix16( corr_shiftR, Q15 ); + + st->hVAD->running_avg_fx = float_to_fix16( st->hVAD->running_avg, Q15 ); + st->hVAD->ra_deltasum_fx = float_to_fix16( st->hVAD->ra_deltasum, Q15 ); + + if ( lr_vad_enabled && st->idchan == 0 ) + { + for ( int j = 0; j < 2; j++ ) + { + hCPE->hFrontVad[j]->hVAD->running_avg_fx = float_to_fix16( hCPE->hFrontVad[j]->hVAD->running_avg, Q15 ); + hCPE->hFrontVad[j]->hVAD->ra_deltasum_fx = float_to_fix16( hCPE->hFrontVad[j]->hVAD->ra_deltasum, Q15 ); + } + } + + floatToFixed_arr16( A, A_fx, Q12, NB_SUBFR16k * ( M + 1 ) ); + st->bckr_tilt_lt = float_to_fix( st->bckr_tilt_lt_flt, Q16 ); + + + Word16 q_fr_bands = Q30; + q_fr_bands = min( q_fr_bands, Q_factor_arrL( tmpN, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( tmpE, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( fr_bands, 2 * NB_BANDS ) - 4 /* needs 4 gaurd bits for summation */ ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( st->hNoiseEst->bckr, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( st->hNoiseEst->fr_bands1, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( st->hNoiseEst->fr_bands2, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( st->hNoiseEst->ave_enr, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( st->hNoiseEst->ave_enr2, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( lf_E, 2 * VOIC_BINS ) + 2 ); + + if ( lr_vad_enabled && st->idchan == 0 ) + { + for ( int j = 0; j < 2; j++ ) + { + q_fr_bands = min( q_fr_bands, Q_factor_arrL( lf_E_LR[j], 2 * VOIC_BINS ) + 2 ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( tmpN_LR[j], NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( tmpE_LR[j], NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( fr_bands_LR[j], 2 * NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( hCPE->hFrontVad[j]->hNoiseEst->bckr, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( hCPE->hFrontVad[j]->hNoiseEst->fr_bands1, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( hCPE->hFrontVad[j]->hNoiseEst->fr_bands2, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( hCPE->hFrontVad[j]->hNoiseEst->ave_enr, NB_BANDS ) ); + q_fr_bands = min( q_fr_bands, Q_factor_arrL( hCPE->hFrontVad[j]->hNoiseEst->ave_enr2, NB_BANDS ) ); + } + + for ( int j = 0; j < 2; j++ ) + { + floatToFixed_arr32( fr_bands_LR[j], fr_bands_LR_fx[j], q_fr_bands, 2 * NB_BANDS ); + floatToFixed_arr32( hCPE->hFrontVad[j]->hNoiseEst->bckr, hCPE->hFrontVad[j]->hNoiseEst->bckr_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( hCPE->hFrontVad[j]->hNoiseEst->fr_bands1, hCPE->hFrontVad[j]->hNoiseEst->fr_bands1_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( hCPE->hFrontVad[j]->hNoiseEst->fr_bands2, hCPE->hFrontVad[j]->hNoiseEst->fr_bands2_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( hCPE->hFrontVad[j]->hNoiseEst->ave_enr, hCPE->hFrontVad[j]->hNoiseEst->ave_enr_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( hCPE->hFrontVad[j]->hNoiseEst->ave_enr2, hCPE->hFrontVad[j]->hNoiseEst->ave_enr2_fx, q_fr_bands, NB_BANDS ); + for ( int k = 0; k < NB_BANDS; k++ ) + { + hCPE->hFrontVad[j]->hNoiseEst->fr_bands1_fx[k] = max( 1, hCPE->hFrontVad[j]->hNoiseEst->fr_bands1_fx[k] ); + hCPE->hFrontVad[j]->hNoiseEst->fr_bands2_fx[k] = max( 1, hCPE->hFrontVad[j]->hNoiseEst->fr_bands2_fx[k] ); + } + floatToFixed_arr32( tmpN_LR[j], tmpN_LR_fx[j], q_fr_bands, NB_BANDS ); + floatToFixed_arr32( tmpE_LR[j], tmpE_LR_fx[j], q_fr_bands, NB_BANDS ); + floatToFixed_arr32( lf_E_LR[j], lf_E_LR_fx[j], q_fr_bands - 2, 2 * VOIC_BINS ); + } + } + floatToFixed_arr32( tmpN, tmpN_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( tmpE, tmpE_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( lf_E, lf_E_fx, q_fr_bands - 2, 2 * VOIC_BINS ); + floatToFixed_arr32( st->hNoiseEst->bckr, st->hNoiseEst->bckr_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( st->hNoiseEst->fr_bands1, st->hNoiseEst->fr_bands1_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( st->hNoiseEst->fr_bands2, st->hNoiseEst->fr_bands2_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( st->hNoiseEst->ave_enr, st->hNoiseEst->ave_enr_fx, q_fr_bands, NB_BANDS ); + floatToFixed_arr32( st->hNoiseEst->ave_enr2, st->hNoiseEst->ave_enr2_fx, q_fr_bands, NB_BANDS ); + + for ( int k = 0; k < NB_BANDS; k++ ) + { + st->hNoiseEst->fr_bands1_fx[k] = max( 1, st->hNoiseEst->fr_bands1_fx[k] ); + st->hNoiseEst->fr_bands2_fx[k] = max( 1, st->hNoiseEst->fr_bands2_fx[k] ); + } + + floatToFixed_arr32( fr_bands, fr_bands_fx, q_fr_bands, 2 * NB_BANDS ); + + q_inp_12k8 = Q_factor_arr( old_inp_12k8, L_INP_12k8 ); // inp_12k8_fx + floatToFixed_arr16( old_inp_12k8, old_inp_12k8_fx, q_inp_12k8, L_INP_12k8 ); + + Word16 Etot16_fx = float_to_fix16( Etot, Q8 ); + relE_fx = float_to_fix16( *relE, Q8 ); + + Word16 Q_epsp = Q_factor_arrL( epsP, M + 1 ); + floatToFixed_arr32( epsP, epsP_fx, Q_epsp, M + 1 ); + FOR( Word16 j = 0; j <= M; j++ ) + { + L_Extract( epsP_fx[j], &epsP_h[j], &epsP_l[j] ); + } + + st->hSpMusClas->mean_avr_dyn_fx = float_to_fix16( st->hSpMusClas->mean_avr_dyn, Q7 ); + st->hSpMusClas->last_sw_dyn_fx = float_to_fix16( st->hSpMusClas->last_sw_dyn, Q7 ); + + floatToFixed_arr16( st->hSpMusClas->past_log_enr, st->hSpMusClas->past_log_enr_fx, Q8, NB_BANDS_SPMUS ); + + floatToFixed_arr16( st->hNoiseEst->old_S, st->hNoiseEst->old_S_fx, Q7, 128 ); + floatToFixed_arr16( st->hNoiseEst->cor_map, st->hNoiseEst->cor_map_fx, Q15, 128 ); + + st->hNoiseEst->Etot_lp_fx = float_to_fix16( st->hNoiseEst->Etot_lp, Q8 ); + st->hNoiseEst->Etot_v_h2_fx = float_to_fix16( st->hNoiseEst->Etot_v_h2, Q8 ); + st->hNoiseEst->Etot_l_lp_fx = float_to_fix16( st->hNoiseEst->Etot_l_lp, Q8 ); + st->hNoiseEst->Etot_st_est_fx = float_to_fix16( st->hNoiseEst->Etot_st_est, Q8 ); + st->hNoiseEst->Etot_sq_st_est_fx = float_to_fix16( st->hNoiseEst->Etot_sq_st_est, Q2 ); + st->hNoiseEst->multi_harm_limit_fx = float_to_fix16( st->hNoiseEst->multi_harm_limit, Q9 ); + st->hNoiseEst->totalNoise_fx = float_to_fix16( st->hNoiseEst->totalNoise, Q8 ); + st->hNoiseEst->noise_char_fx = float_to_fix16( st->hNoiseEst->noise_char, Q11 ); + st->hNoiseEst->epsP_0_2_lp_fx = float_to_fix16( st->hNoiseEst->epsP_0_2_lp, Q12 ); + st->hNoiseEst->epsP_0_2_ad_lp_fx = float_to_fix16( st->hNoiseEst->epsP_0_2_ad_lp, Q12 ); + st->hNoiseEst->epsP_2_16_lp_fx = float_to_fix16( st->hNoiseEst->epsP_2_16_lp, Q12 ); + st->hNoiseEst->epsP_2_16_lp2_fx = float_to_fix16( st->hNoiseEst->epsP_2_16_lp2, Q12 ); + st->hNoiseEst->epsP_2_16_dlp_lp2_fx = float_to_fix16( st->hNoiseEst->epsP_2_16_dlp_lp2, Q12 ); + st->hNoiseEst->lt_tn_track_fx = float_to_fix16( st->hNoiseEst->lt_tn_track, Q15 ); + st->hNoiseEst->lt_tn_dist_fx = float_to_fix16( st->hNoiseEst->lt_tn_dist, Q8 ); + st->hNoiseEst->lt_haco_ev_fx = float_to_fix16( st->hNoiseEst->lt_haco_ev, Q15 ); + st->hNoiseEst->lt_Ellp_dist_fx = float_to_fix16( st->hNoiseEst->lt_Ellp_dist, Q8 ); + st->hNoiseEst->sign_dyn_lp_fx = float_to_fix16( st->hNoiseEst->sign_dyn_lp, Q8 ); + st->hNoiseEst->act_pred_fx = float_to_fix16( st->hNoiseEst->act_pred, Q15 ); + st->hNoiseEst->lt_aEn_zero_fx = float_to_fix16( st->hNoiseEst->lt_aEn_zero, Q15 ); + + if ( lr_vad_enabled && st->idchan == 0 ) + { + for ( int j = 0; j < 2; j++ ) + { + floatToFixed_arr16( hCPE->hFrontVad[j]->hNoiseEst->old_S, hCPE->hFrontVad[j]->hNoiseEst->old_S_fx, Q7, 128 ); + floatToFixed_arr16( hCPE->hFrontVad[j]->hNoiseEst->cor_map, hCPE->hFrontVad[j]->hNoiseEst->cor_map_fx, Q15, 128 ); + + hCPE->hFrontVad[j]->hNoiseEst->Etot_lp_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->Etot_lp, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_v_h2_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->Etot_v_h2, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_l_lp_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->Etot_l_lp, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_st_est_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->Etot_st_est, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_sq_st_est_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->Etot_sq_st_est, Q2 ); + hCPE->hFrontVad[j]->hNoiseEst->multi_harm_limit_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->multi_harm_limit, Q9 ); + hCPE->hFrontVad[j]->hNoiseEst->totalNoise_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->totalNoise, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->noise_char_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->noise_char, Q11 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_lp_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_lp, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_ad_lp_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_ad_lp, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp2_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp2, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_dlp_lp2_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_dlp_lp2, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_tn_track_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->lt_tn_track, Q15 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_tn_dist_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->lt_tn_dist, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_haco_ev_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->lt_haco_ev, Q15 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_Ellp_dist_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->lt_Ellp_dist, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->sign_dyn_lp_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->sign_dyn_lp, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->act_pred_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->act_pred, Q15 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_aEn_zero_fx = float_to_fix16( hCPE->hFrontVad[j]->hNoiseEst->lt_aEn_zero, Q15 ); + + hCPE->hFrontVad[j]->lp_speech_fx = float_to_fix16( hCPE->hFrontVad[j]->lp_speech, Q8 ); + + Etot_LR_fx[j] = float_to_fix16( Etot_LR[j], Q8 ); + } + } + + if ( st->hSC_VBR != NULL ) + { + st->hSC_VBR->vadnoise_fx = float_to_fix16( st->hSC_VBR->vadnoise, Q8 ); + } + st->lp_noise_fx = float_to_fix16( st->lp_noise, Q8 ); + st->hNoiseEst->Etot_last_fx = float_to_fix16( st->hNoiseEst->Etot_last, Q8 ); + + floatToFixed_arr32( st->old_enr_ssf, st->old_enr_ssf_fx, 0, 2 * NB_SSF ); #endif // IVAS_FLOAT_FIXED_CONVERSIONS /* Detection of very short stable pitch period */ - StableHighPitchDetect_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, q_wsp, st->lgBin_E_fx ); + StableHighPitchDetect_ivas_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, q_wsp, st->lgBin_E_fx ); /* 1/4 pitch precision improvement */ IF( LE_32( element_brate, IVAS_32k ) ) @@ -2107,18 +2359,165 @@ ivas_error pre_proc_front_ivas_fx( move16(); } + /*------------------------------------------------------------------* + * Update estimated noise energy and voicing cut-off frequency + *-----------------------------------------------------------------*/ + + noise_est_ivas_fx( st, old_pitch1, tmpN_fx, epsP_h, epsP_l, Etot16_fx, relE_fx, corr_shift_fx, tmpE_fx, fr_bands_fx, &cor_map_sum16_fx, &ncharX_fx, &sp_div16_fx, &q_sp_div, + &non_staX16_fx, loc_harm, lf_E_fx, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp_fx, st->hNoiseEst->Etot_v_h2_fx, &st->hNoiseEst->bg_cnt, st->lgBin_E_fx, sub( q_fr_bands, QSCALE ), L_shl( E_MIN_IVAS_FX, sub( q_fr_bands, Q19 ) ), &dummy_fx, S_map_fx, hStereoClassif, NULL, st->ini_frame ); + + test(); + IF( lr_vad_enabled && st->idchan == 0 ) + { + /* Run noise_est for Left and Right channel */ + *loc_harmLR_fx = *loc_harm; + noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[0], epsP_h, epsP_l, Etot_LR_fx[0], sub( Etot_LR_fx[0], hCPE->hFrontVad[0]->lp_speech_fx ), corr_shiftL_fx, tmpE_LR_fx[0], fr_bands_LR_fx[0], &cor_map_sum_LR_fx[0], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR, + &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[0], &hCPE->hFrontVad[0]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp_fx, hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[0]->hNoiseEst->bg_cnt, st->lgBin_E_fx, sub( q_fr_bands, QSCALE ), L_shl( E_MIN_IVAS_FX, sub( q_fr_bands, Q19 ) ), &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[0], hCPE->hFrontVad[0]->ini_frame ); + + /* Note: the index [0] in the last argument is intended, the ini_frame counter is only maintained in the zero-th channel's VAD handle */ + noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[1], epsP_h, epsP_l, Etot_LR_fx[1], sub( Etot_LR_fx[1], hCPE->hFrontVad[1]->lp_speech_fx ), corr_shiftR_fx, tmpE_LR_fx[1], fr_bands_LR_fx[1], &cor_map_sum_LR_fx[1], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR, + &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[1], &hCPE->hFrontVad[1]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp_fx, hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[1]->hNoiseEst->bg_cnt, st->lgBin_E_fx, sub( q_fr_bands, QSCALE ), L_shl( E_MIN_IVAS_FX, sub( q_fr_bands, Q19 ) ), &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[1], hCPE->hFrontVad[0]->ini_frame ); + } + + /*------------------------------------------------------------------* + * Update parameters used in the VAD and DTX + *-----------------------------------------------------------------*/ + vad_param_updt_fx( st, old_pitch1, corr_shift_fx, corr_shift_fx, A_fx, NULL, 1 ); + + IF( lr_vad_enabled && st->idchan == 0 ) + { + vad_param_updt_fx( st, old_pitch1, corr_shiftL_fx, corr_shiftR_fx, A_fx, &hCPE->hFrontVad[0], CPE_CHANNELS ); + } + /*-----------------------------------------------------------------* + * Find spectral tilt + * UC and VC frame selection + *-----------------------------------------------------------------*/ + find_tilt_ivas_fx( fr_bands_fx, st->hNoiseEst->bckr_fx, ee_fx, st->pitch, st->voicing_fx, lf_E_fx, corr_shift_fx, st->input_bwidth, st->max_band, hp_E_fx, MODE1, q_fr_bands, &( st->bckr_tilt_lt ), st->Opt_SC_VBR ); + + st->coder_type = find_uv_ivas_fx( st, pitch_fr_fx, voicing_fr_fx, inp_12k8_fx, ee_fx, &dE1X_fx, corr_shift_fx, relE_fx, Etot16_fx, hp_E_fx, &flag_spitch, last_core_orig, hStereoClassif, q_inp_12k8, q_fr_bands ); + #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - st->diff_sm = fixedToFloat( st->diff_sm_fx, Q7 ); - st->energy_sm = fixedToFloat( st->energy_sm_fx, Q7 ); - st->LF_EnergyRatio_sm = fixedToFloat_16( st->LF_EnergyRatio_sm_fx, Q7 ); + st->diff_sm = fixedToFloat_32( st->diff_sm_fx, Q7 ); + st->energy_sm = fixedToFloat_32( st->energy_sm_fx, Q7 ); + st->voicing_sm = fixedToFloat_16( st->voicing_sm_fx, Q15 ); st->voicing0_sm = fixedToFloat_16( st->voicing0_sm_fx, Q15 ); + st->LF_EnergyRatio_sm = fixedToFloat_16( st->LF_EnergyRatio_sm_fx, Q7 ); - fixedToFloat_arr( voicing_fr_fx, voicing_fr, Q15, NB_SUBFR ); fixedToFloat_arr( pitch_fr_fx, pitch_fr, Q6, NB_SUBFR ); + fixedToFloat_arr( voicing_fr_fx, voicing_fr, Q15, NB_SUBFR ); -#endif // IVAS_FLOAT_FIXED_CONVERSIONS + st->hVAD->running_avg = fixedToFloat_16( st->hVAD->running_avg_fx, Q15 ); + st->hVAD->ra_deltasum = fixedToFloat_16( st->hVAD->ra_deltasum_fx, Q15 ); + + if ( lr_vad_enabled && st->idchan == 0 ) + { + hCPE->hFrontVad[0]->hVAD->running_avg = fixedToFloat_16( hCPE->hFrontVad[0]->hVAD->running_avg_fx, Q15 ); + hCPE->hFrontVad[0]->hVAD->ra_deltasum = fixedToFloat_16( hCPE->hFrontVad[0]->hVAD->ra_deltasum_fx, Q15 ); + hCPE->hFrontVad[1]->hVAD->running_avg = fixedToFloat_16( hCPE->hFrontVad[1]->hVAD->running_avg_fx, Q15 ); + hCPE->hFrontVad[1]->hVAD->ra_deltasum = fixedToFloat_16( hCPE->hFrontVad[1]->hVAD->ra_deltasum_fx, Q15 ); + } + + fixedToFloat_arrL32( ee_fx, ee, Q6, 2 ); + fixedToFloat_arrL32( hp_E_fx, hp_E, q_fr_bands, 2 ); + st->bckr_tilt_lt_flt = fixedToFloat_32( st->bckr_tilt_lt, Q16 ); + + if ( hStereoClassif != NULL ) + { + hStereoClassif->dE1_ch1 = fixedToFloat_32( hStereoClassif->dE1_ch1_fx, 31 - hStereoClassif->dE1_ch1_e ); + hStereoClassif->dE1_ch2 = fixedToFloat_32( hStereoClassif->dE1_ch2_fx, 31 - hStereoClassif->dE1_ch2_e ); + hStereoClassif->nchar_ch1 = fixedToFloat_32( hStereoClassif->nchar_ch1_fx, 31 - hStereoClassif->nchar_ch1_e ); + hStereoClassif->nchar_ch2 = fixedToFloat_32( hStereoClassif->nchar_ch2_fx, 31 - hStereoClassif->nchar_ch2_e ); + } + fixedToFloat_arrL32( st->old_enr_ssf_fx, st->old_enr_ssf, 0, 2 * NB_SSF ); + + non_staX = fixedToFloat_16( non_staX16_fx, Q8 ); + sp_div = fixedToFloat_16( sp_div16_fx, q_sp_div ); + *cor_map_sum = fixedToFloat_16( cor_map_sum16_fx, Q8 ); + + fixedToFloat_arr( S_map_fx, S_map, Q7, L_FFT / 2 ); + fixedToFloat_arr( st->lgBin_E_fx, st->Bin_E, Q7, L_FFT / 2 ); + + if ( lr_vad_enabled && st->idchan == 0 ) + { + fixedToFloat_arr( cor_map_sum_LR_fx, cor_map_sum_LR, Q8, 2 ); + fixedToFloat_arr( S_map_LR_fx, S_map_LR, Q7, L_FFT / 2 ); + for ( int j = 0; j < 2; j++ ) + { + fixedToFloat_arrL32( hCPE->hFrontVad[j]->hNoiseEst->bckr_fx, hCPE->hFrontVad[j]->hNoiseEst->bckr, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( hCPE->hFrontVad[j]->hNoiseEst->fr_bands1_fx, hCPE->hFrontVad[j]->hNoiseEst->fr_bands1, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( hCPE->hFrontVad[j]->hNoiseEst->fr_bands2_fx, hCPE->hFrontVad[j]->hNoiseEst->fr_bands2, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( hCPE->hFrontVad[j]->hNoiseEst->ave_enr_fx, hCPE->hFrontVad[j]->hNoiseEst->ave_enr, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( hCPE->hFrontVad[j]->hNoiseEst->ave_enr2_fx, hCPE->hFrontVad[j]->hNoiseEst->ave_enr2, q_fr_bands, NB_BANDS ); + } + } + fixedToFloat_arrL32( st->hNoiseEst->bckr_fx, st->hNoiseEst->bckr, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( st->hNoiseEst->fr_bands1_fx, st->hNoiseEst->fr_bands1, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( st->hNoiseEst->fr_bands2_fx, st->hNoiseEst->fr_bands2, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( st->hNoiseEst->ave_enr_fx, st->hNoiseEst->ave_enr, q_fr_bands, NB_BANDS ); + fixedToFloat_arrL32( st->hNoiseEst->ave_enr2_fx, st->hNoiseEst->ave_enr2, q_fr_bands, NB_BANDS ); + + fixedToFloat_arr( st->hSpMusClas->past_log_enr_fx, st->hSpMusClas->past_log_enr, Q8, NB_BANDS_SPMUS ); + st->hSpMusClas->ener_RAT = fixedToFloat_16( st->hSpMusClas->ener_RAT_fx, Q15 ); + st->hSpMusClas->mean_avr_dyn = fixedToFloat_16( st->hSpMusClas->mean_avr_dyn_fx, Q7 ); + st->hSpMusClas->last_sw_dyn = fixedToFloat_16( st->hSpMusClas->last_sw_dyn_fx, Q7 ); + + fixedToFloat_arr( st->hNoiseEst->old_S_fx, st->hNoiseEst->old_S, Q7, 128 ); + fixedToFloat_arr( st->hNoiseEst->cor_map_fx, st->hNoiseEst->cor_map, Q15, 128 ); + + st->hNoiseEst->Etot_lp = fixedToFloat_16( st->hNoiseEst->Etot_lp_fx, Q8 ); + st->hNoiseEst->Etot_v_h2 = fixedToFloat_16( st->hNoiseEst->Etot_v_h2_fx, Q8 ); + st->hNoiseEst->Etot_l_lp = fixedToFloat_16( st->hNoiseEst->Etot_l_lp_fx, Q8 ); + st->hNoiseEst->multi_harm_limit = fixedToFloat_16( st->hNoiseEst->multi_harm_limit_fx, Q9 ); + st->hNoiseEst->totalNoise = fixedToFloat_16( st->hNoiseEst->totalNoise_fx, Q8 ); + st->hNoiseEst->noise_char = fixedToFloat_16( st->hNoiseEst->noise_char_fx, Q11 ); + st->hNoiseEst->epsP_0_2_lp = fixedToFloat_16( st->hNoiseEst->epsP_0_2_lp_fx, Q12 ); + st->hNoiseEst->epsP_0_2_ad_lp = fixedToFloat_16( st->hNoiseEst->epsP_0_2_ad_lp_fx, Q12 ); + st->hNoiseEst->epsP_2_16_lp = fixedToFloat_16( st->hNoiseEst->epsP_2_16_lp_fx, Q12 ); + st->hNoiseEst->epsP_2_16_lp2 = fixedToFloat_16( st->hNoiseEst->epsP_2_16_lp2_fx, Q12 ); + st->hNoiseEst->epsP_2_16_dlp_lp2 = fixedToFloat_16( st->hNoiseEst->epsP_2_16_dlp_lp2_fx, Q12 ); + st->hNoiseEst->lt_tn_track = fixedToFloat_16( st->hNoiseEst->lt_tn_track_fx, Q15 ); + st->hNoiseEst->lt_tn_dist = fixedToFloat_16( st->hNoiseEst->lt_tn_dist_fx, Q8 ); + st->hNoiseEst->lt_haco_ev = fixedToFloat_16( st->hNoiseEst->lt_haco_ev_fx, Q15 ); + st->hNoiseEst->lt_Ellp_dist = fixedToFloat_16( st->hNoiseEst->lt_Ellp_dist_fx, Q8 ); + st->hNoiseEst->sign_dyn_lp = fixedToFloat_16( st->hNoiseEst->sign_dyn_lp_fx, Q8 ); + st->hNoiseEst->act_pred = fixedToFloat_16( st->hNoiseEst->act_pred_fx, Q15 ); + st->hNoiseEst->lt_aEn_zero = fixedToFloat_16( st->hNoiseEst->lt_aEn_zero_fx, Q15 ); + st->hNoiseEst->Etot_sq_st_est = fixedToFloat_16( st->hNoiseEst->Etot_sq_st_est_fx, Q2 ); + st->hNoiseEst->Etot_st_est = fixedToFloat_16( st->hNoiseEst->Etot_st_est_fx, Q8 ); + + if ( lr_vad_enabled && st->idchan == 0 ) + { + for ( int j = 0; j < 2; j++ ) + { + fixedToFloat_arr( hCPE->hFrontVad[j]->hNoiseEst->old_S_fx, hCPE->hFrontVad[j]->hNoiseEst->old_S, Q7, 128 ); + fixedToFloat_arr( hCPE->hFrontVad[j]->hNoiseEst->cor_map_fx, hCPE->hFrontVad[j]->hNoiseEst->cor_map, Q15, 128 ); + + hCPE->hFrontVad[j]->hNoiseEst->Etot_lp = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->Etot_lp_fx, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_v_h2 = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->Etot_v_h2_fx, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_l_lp = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->Etot_l_lp_fx, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->multi_harm_limit = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->multi_harm_limit_fx, Q9 ); + hCPE->hFrontVad[j]->hNoiseEst->totalNoise = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->totalNoise_fx, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->noise_char = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->noise_char_fx, Q11 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_lp = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_lp_fx, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_ad_lp = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->epsP_0_2_ad_lp_fx, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp_fx, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp2 = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_lp2_fx, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_dlp_lp2 = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->epsP_2_16_dlp_lp2_fx, Q12 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_tn_track = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->lt_tn_track_fx, Q15 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_tn_dist = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->lt_tn_dist_fx, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_haco_ev = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->lt_haco_ev_fx, Q15 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_Ellp_dist = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->lt_Ellp_dist_fx, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->sign_dyn_lp = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->sign_dyn_lp_fx, Q8 ); + hCPE->hFrontVad[j]->hNoiseEst->act_pred = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->act_pred_fx, Q15 ); + hCPE->hFrontVad[j]->hNoiseEst->lt_aEn_zero = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->lt_aEn_zero_fx, Q15 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_sq_st_est = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->Etot_sq_st_est_fx, Q2 ); + hCPE->hFrontVad[j]->hNoiseEst->Etot_st_est = fixedToFloat_16( hCPE->hFrontVad[j]->hNoiseEst->Etot_st_est_fx, Q8 ); + } + } + +#endif #else /* Detection of very short stable pitch period */ @@ -2144,7 +2543,6 @@ ivas_error pre_proc_front_ivas_fx( voicing_fr[2] = st->voicing[1]; voicing_fr[3] = st->voicing[1]; } -#endif // IVAS_FLOAT_FIXED /*------------------------------------------------------------------* * Update estimated noise energy and voicing cut-off frequency @@ -2184,6 +2582,7 @@ ivas_error pre_proc_front_ivas_fx( find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, MODE1, &( st->bckr_tilt_lt_flt ), st->Opt_SC_VBR ); st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif ); +#endif // IVAS_FLOAT_FIXED /*-----------------------------------------------------------------* * channel aware mode configuration * @@ -2302,7 +2701,6 @@ ivas_error pre_proc_front_ivas_fx( long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL ); #else #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word16 Etot16_fx; // conv params from float to fix Etot_fx = float_to_fix( Etot, 8 ); Etot16_fx = extract_l( Etot_fx ); @@ -2525,7 +2923,6 @@ ivas_error pre_proc_front_ivas_fx( #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS #define ATT_SEG_LEN ( L_FRAME / ATT_NSEG ) - Word16 relE_fx; relE_fx = float_to_fix16( *relE, Q8 ); Etot16_fx = float_to_fix16( Etot, Q8 ); // Q_new = s_min(Q_factor_arr( inp_12k8, ATT_NSEG * ATT_SEG_LEN ), Q_factor_arr( st->Bin_E, 256 )) - 1; diff --git a/lib_enc/ivas_corecoder_enc_reconfig.c b/lib_enc/ivas_corecoder_enc_reconfig.c index 1b2371267f76fcec360e685580a758af34730936..bad7375f5479a781feed11182c054ee8f1392cb7 100644 --- a/lib_enc/ivas_corecoder_enc_reconfig.c +++ b/lib_enc/ivas_corecoder_enc_reconfig.c @@ -212,7 +212,7 @@ ivas_error ivas_corecoder_enc_reconfig( mvr2r( st_ivas->hSCE[sce_id]->hCoreCoder[0]->input_buff, input_buff[sce_id], len_inp_memory ); } - destroy_sce_enc( st_ivas->hSCE[sce_id], ( EQ_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) && !st_ivas->hEncoderConfig->stereo_dmx_evs ) ); + destroy_sce_enc( st_ivas->hSCE[sce_id], EQ_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) ); st_ivas->hSCE[sce_id] = NULL; } diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index f6796681534744f76a3d7beda5a13e1f7f95d099..e35be41a86d1df7d3508acbdfc92d000e5b7c475 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -227,7 +227,8 @@ ivas_error ivas_cpe_enc( floatToFixed_arr( sts[n]->input, sts[n]->input_fx, Q_inp, L_FRAME48k ); } } - if ( ( error = front_vad_fx( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_LR, NULL, NULL, Q_inp ) ) != IVAS_ERR_OK ) + Word16 Q_add = 2; + if ( ( error = front_vad_fx( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_LR, NULL, NULL, Q_inp, Q_add ) ) != IVAS_ERR_OK ) { return error; } @@ -572,10 +573,131 @@ ivas_error ivas_cpe_enc( } else if ( hCPE->element_mode == IVAS_CPE_TD ) { +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + floatToFixed_arr16( sts[0]->input, sts[0]->input_fx, 0, input_frame ); + floatToFixed_arr16( sts[1]->input, sts[1]->input_fx, 0, input_frame ); + + hCPE->hStereoTD->tdm_lt_rms_L_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_rms_L, Q16 ); + hCPE->hStereoTD->tdm_lt_rms_R_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_rms_R, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_L_fx = floatToFixed_32( hCPE->hStereoTD->tdm_last_ener_lt_L, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_R_fx = floatToFixed_32( hCPE->hStereoTD->tdm_last_ener_lt_R, Q16 ); + hCPE->hStereoTD->tdm_LT_es_em_fx = floatToFixed_32( hCPE->hStereoTD->tdm_LT_es_em, Q21 ); + sts[0]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[0]->hNoiseEst->Etot_last, Q8 ); + sts[1]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[1]->hNoiseEst->Etot_last, Q8 ); + sts[0]->old_corr_fx = float_to_fix16( sts[0]->old_corr, Q15 ); + sts[1]->old_corr_fx = float_to_fix16( sts[1]->old_corr, Q15 ); + + sts[0]->ee_old_fx = floatToFixed( sts[0]->ee_old, Q6 ); + sts[1]->ee_old_fx = floatToFixed( sts[1]->ee_old, Q6 ); + hCPE->hStereoTD->tdm_last_ratio_fx = floatToFixed( hCPE->hStereoTD->tdm_last_ratio, Q31 ); + + hCPE->hStereoTD->tdm_lt_corr_LM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_corr_LM, Q24 ); + hCPE->hStereoTD->tdm_lt_corr_RM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_corr_RM, Q24 ); + hCPE->hStereoTD->q_tdm_last_diff_lt_corr = Q31; + hCPE->hStereoTD->tdm_last_diff_lt_corr_fx = floatToFixed_32( hCPE->hStereoTD->tdm_last_diff_lt_corr, hCPE->hStereoTD->q_tdm_last_diff_lt_corr ); + floatToFixed_arrL( hCPE->hStereoClassif->unclr_fv, hCPE->hStereoClassif->unclr_fv_fx, Q15, 58 ); + floatToFixed_arrL( hCPE->hStereoClassif->xtalk_fv, hCPE->hStereoClassif->xtalk_fv_fx, Q15, SSC_MAX_NFEA ); + floatToFixed_arrL( hCPE->hStereoClassif->unclr_relE_0_1_LT, hCPE->hStereoClassif->unclr_relE_0_1_LT_fx, Q31, UNCLR_RC_ORDER ); + floatToFixed_arrL( hCPE->hStereoClassif->xtalk_score_buf, hCPE->hStereoClassif->xtalk_score_buf_fx, Q31, XTALK_SCORE_BUF_LEN ); + for ( int i = 0; i < XTALK_SCORE_BUF_LEN; i++ ) + { + Word64 temp = (Word64) ( hCPE->hStereoClassif->xtalk_score_buf[i] * ONE_IN_Q31 ); + if ( temp - 1 == MAX_32 ) + { + hCPE->hStereoClassif->xtalk_score_buf_fx[i] = MAX_32; + } + else if ( temp > MAX_32 || temp < MIN_32 ) + { + assert( 0 ); + } + else + { + hCPE->hStereoClassif->xtalk_score_buf_fx[i] = (Word32) temp; + } + } + hCPE->hStereoClassif->ratio_L_fx = floatToFixed_32( hCPE->hStereoClassif->ratio_L, Q31 ); + hCPE->hStereoClassif->relE_0_1_fx = floatToFixed_32( hCPE->hStereoClassif->relE_0_1, Q31 ); + hCPE->hStereoClassif->unclr_wscore_fx = floatToFixed_32( hCPE->hStereoClassif->unclr_wscore, Q31 ); + hCPE->hStereoClassif->xtalk_score_fx = floatToFixed_32( hCPE->hStereoClassif->xtalk_score, Q31 ); + hCPE->hStereoClassif->xtalk_wscore_fx = floatToFixed_32( hCPE->hStereoClassif->xtalk_wscore, Q31 ); + hCPE->hStereoClassif->xtalk_score_wrelE_fx = floatToFixed_32( hCPE->hStereoClassif->xtalk_score_wrelE, Q31 ); + IF( hCPE->hStereoTD->tdm_last_SM_flag ) + { + IF( hCPE->hStereoTD->tdm_SM_reset_flag ) + { + hCPE->hStereoTD->tdm_lt_corr_RM_SM = 0.01f; + hCPE->hStereoTD->tdm_lt_corr_LM_SM = 0.01f; + hCPE->hStereoTD->tdm_last_ratio_SM = hCPE->hStereoTD->tdm_last_ratio; + + hCPE->hStereoTD->tdm_lt_rms_L_SM = 40.0f; + hCPE->hStereoTD->tdm_lt_rms_R_SM = 40.0f; + hCPE->hStereoTD->tdm_last_diff_lt_corr_SM = 0; + hCPE->hStereoTD->tdm_last_ener_lt_R_SM = 0; + hCPE->hStereoTD->tdm_last_ener_lt_L_SM = 0; + hCPE->hStereoTD->tdm_LT_es_em_SM = 0.1f; + } + floatToFixed_arr16( sts[0]->input, sts[0]->input_fx, 0, input_frame ); + floatToFixed_arr16( sts[1]->input, sts[1]->input_fx, 0, input_frame ); + + hCPE->hStereoTD->tdm_lt_rms_L_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_rms_L_SM, Q16 ); + hCPE->hStereoTD->tdm_lt_rms_R_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_rms_R_SM, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_L_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_last_ener_lt_L_SM, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_R_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_last_ener_lt_R_SM, Q16 ); + hCPE->hStereoTD->tdm_LT_es_em_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_LT_es_em_SM, Q21 ); + sts[0]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[0]->hNoiseEst->Etot_last, Q8 ); + sts[1]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[1]->hNoiseEst->Etot_last, Q8 ); + sts[0]->old_corr_fx = float_to_fix16( sts[0]->old_corr, Q15 ); + sts[1]->old_corr_fx = float_to_fix16( sts[1]->old_corr, Q15 ); + + hCPE->hStereoTD->tdm_lt_corr_LM_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_corr_LM_SM, Q24 ); + hCPE->hStereoTD->tdm_lt_corr_RM_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_lt_corr_RM_SM, Q24 ); + hCPE->hStereoTD->q_tdm_last_diff_lt_corr_SM = Q31; + hCPE->hStereoTD->tdm_last_diff_lt_corr_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_last_diff_lt_corr_SM, hCPE->hStereoTD->q_tdm_last_diff_lt_corr_SM ); + + hCPE->hStereoTD->tdm_last_ratio_SM_fx = floatToFixed_32( hCPE->hStereoTD->tdm_last_ratio_SM, Q31 ); + } +#endif /* Determine the energy ratio between the 2 channels */ - tdm_ratio_idx = stereo_tdm_ener_analysis( + tdm_ratio_idx = stereo_tdm_ener_analysis_fx( ivas_format, hCPE, input_frame, &tdm_SM_or_LRTD_Pri, &tdm_ratio_idx_SM ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + hCPE->hStereoTD->tdm_lt_rms_L = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_rms_L_fx, Q16 ); + hCPE->hStereoTD->tdm_lt_rms_R = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_rms_R_fx, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_L = fixedToFloat_32( hCPE->hStereoTD->tdm_last_ener_lt_L_fx, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_R = fixedToFloat_32( hCPE->hStereoTD->tdm_last_ener_lt_R_fx, Q16 ); + hCPE->hStereoTD->tdm_LT_es_em = fixedToFloat_32( hCPE->hStereoTD->tdm_LT_es_em_fx, Q21 ); + sts[0]->hNoiseEst->Etot_last = fix16_to_float( sts[0]->hNoiseEst->Etot_last_fx, Q8 ); + sts[1]->hNoiseEst->Etot_last = fix16_to_float( sts[1]->hNoiseEst->Etot_last_fx, Q8 ); + hCPE->hStereoClassif->xtalk_fv[E_diff_corrLM_corrRM] = fixedToFloat( hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM], Q15 ); + hCPE->hStereoClassif->xtalk_fv[E_tdm_LT_es_em] = fixedToFloat( hCPE->hStereoClassif->xtalk_fv_fx[E_tdm_LT_es_em], Q15 ); + + hCPE->hStereoTD->tdm_lt_corr_LM = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_corr_LM_fx, Q24 ); + hCPE->hStereoTD->tdm_lt_corr_RM = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_corr_RM_fx, Q24 ); + hCPE->hStereoTD->tdm_last_diff_lt_corr = fixedToFloat_32( hCPE->hStereoTD->tdm_last_diff_lt_corr_fx, hCPE->hStereoTD->q_tdm_last_diff_lt_corr ); + hCPE->hStereoClassif->ratio_L = fixedToFloat_32( hCPE->hStereoClassif->ratio_L_fx, Q31 ); + hCPE->hStereoClassif->relE_0_1 = fixedToFloat_32( hCPE->hStereoClassif->relE_0_1_fx, Q31 ); + hCPE->hStereoClassif->unclr_wscore = fixedToFloat_32( hCPE->hStereoClassif->unclr_wscore_fx, Q31 ); + fixedToFloat_arrL( hCPE->hStereoClassif->unclr_fv_fx, hCPE->hStereoClassif->unclr_fv, Q15, 58 ); + fixedToFloat_arrL( hCPE->hStereoClassif->xtalk_fv_fx, hCPE->hStereoClassif->xtalk_fv, Q15, SSC_MAX_NFEA ); + fixedToFloat_arrL( hCPE->hStereoClassif->unclr_relE_0_1_LT_fx, hCPE->hStereoClassif->unclr_relE_0_1_LT, Q31, UNCLR_RC_ORDER ); + fixedToFloat_arrL( hCPE->hStereoClassif->xtalk_score_buf_fx, hCPE->hStereoClassif->xtalk_score_buf, Q31, XTALK_SCORE_BUF_LEN ); + hCPE->hStereoClassif->xtalk_score = fixedToFloat_32( hCPE->hStereoClassif->xtalk_score_fx, Q31 ); + hCPE->hStereoClassif->xtalk_wscore = fixedToFloat_32( hCPE->hStereoClassif->xtalk_wscore_fx, Q31 ); + hCPE->hStereoClassif->xtalk_score_wrelE = fixedToFloat_32( hCPE->hStereoClassif->xtalk_score_wrelE_fx, Q31 ); + if ( hCPE->hStereoTD->tdm_last_SM_flag ) + { + hCPE->hStereoTD->tdm_lt_rms_L_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_rms_L_SM_fx, Q16 ); + hCPE->hStereoTD->tdm_lt_rms_R_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_rms_R_SM_fx, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_L_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_last_ener_lt_L_SM_fx, Q16 ); + hCPE->hStereoTD->tdm_last_ener_lt_R_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_last_ener_lt_R_SM_fx, Q16 ); + hCPE->hStereoTD->tdm_LT_es_em_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_LT_es_em_SM_fx, Q21 ); + + hCPE->hStereoTD->tdm_lt_corr_LM_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_corr_LM_SM_fx, Q24 ); + hCPE->hStereoTD->tdm_lt_corr_RM_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_lt_corr_RM_SM_fx, Q24 ); + hCPE->hStereoTD->tdm_last_diff_lt_corr_SM = fixedToFloat_32( hCPE->hStereoTD->tdm_last_diff_lt_corr_SM_fx, hCPE->hStereoTD->q_tdm_last_diff_lt_corr_SM ); + } +#endif /* Compute the downmix signal based on the ratio index */ stereo_tdm_downmix( hCPE->hStereoTD, sts[0]->input, sts[1]->input, input_frame, tdm_ratio_idx, ( ( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) ? tdm_SM_or_LRTD_Pri : 0 ), tdm_ratio_idx_SM ); diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 7c4c8379f7b1f9ca56b06a4b6c2f56b9f52508fe..e466d2763ef90ce3c3fa7223b6281030a8cdd3e8 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -259,7 +259,8 @@ ivas_error front_vad_fx( float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN*/ float *PS_out, /* o : energy spectrum */ float *Bin_E_out, /* o : log-energy spectrum of the current frame */ - Word16 Q_inp ) + Word16 Q_inp, + Word16 Q_add ) { ENC_CORE_HANDLE *sts; float band_energies[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */ @@ -273,7 +274,7 @@ ivas_error front_vad_fx( Word16 snr_sum_he_fx; float Bin_E[L_FFT]; /* per bin log energy spectrum for mid-frame */ Word32 fr_bands_fx[2][2 * NB_BANDS] = { { 0 } }; - Word32 fr_bands_cp_fx[2 * NB_BANDS] = { 0 }; + // Word32 fr_bands_cp_fx[2 * NB_BANDS] = { 0 }; Word32 Bin_E_fx[L_FFT] = { 0 }; /* per bin log energy spectrum for mid-frame */ Word32 Bin_E_old_fx[L_FFT / 2] = { 0 }; /* old per bin log energy spectrum for mid-frame */ Word16 fft_buffLR_fx[2 * L_FFT]; /* fft buffer */ @@ -380,7 +381,7 @@ ivas_error front_vad_fx( hFrontVads[n]->mem_preemph_fx = (Word16) floatToFixed( hFrontVads[n]->mem_preemph, Q_inp ); }*/ Qband = -1; - Word16 Q_new_old = ( Q_inp + Qband ) + 3; + Word16 Q_new_old = ( Q_inp - Qband ) + Q_add; for ( n = 0; n < n_chan; n++ ) { floatToFixed_arr( hFrontVads[n]->mem_decim, hFrontVads[n]->mem_decim_fx, Q_inp, 90 ); @@ -399,8 +400,8 @@ ivas_error front_vad_fx( hFrontVads[n]->lp_speech_fx = (Word16) floatToFixed( hFrontVads[n]->lp_speech, Q8 ); sts[n]->lp_speech_fx = (Word16) floatToFixed( sts[n]->lp_speech, Q8 ); sts[n]->lp_noise_fx = (Word16) floatToFixed( sts[n]->lp_noise, Q8 ); - floatToFixed_arrL( hFrontVads[n]->hNoiseEst->bckr, hFrontVads[n]->hNoiseEst->bckr_fx, Q_new_old + QSCALE, 20 ); - floatToFixed_arrL( hFrontVads[n]->hNoiseEst->enrO, hFrontVads[n]->hNoiseEst->enrO_fx, Q_new_old + QSCALE, 20 ); + floatToFixed_arrL( hFrontVads[n]->hNoiseEst->bckr, hFrontVads[n]->hNoiseEst->bckr_fx, Q_new_old + QSCALE + 2, 20 ); + floatToFixed_arrL( hFrontVads[n]->hNoiseEst->enrO, hFrontVads[n]->hNoiseEst->enrO_fx, Q_new_old + QSCALE + 2, 20 ); sts[n]->flag_noisy_speech_snr_fx = (Word8) sts[n]->flag_noisy_speech_snr; hFrontVads[n]->hVAD->bcg_flux_fx = (Word16) hFrontVads[n]->hVAD->bcg_flux * ( 1 << 4 ); @@ -461,7 +462,7 @@ ivas_error front_vad_fx( analy_sp( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8 + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Bin_E, Bin_E_old, fr_bands[n], lf_E[n], &Etot_LR[n], sts[0]->min_band, sts[0]->max_band, band_energies, PS, fft_buffLR ); #else Word16 Scale_fac[2]; - Q_new = ( Q_inp + Qband ) + 3; + Q_new = ( Q_inp - Qband ); // if ( Q_buffer[n] < Q_inp + Qband ) //{ // Scale_sig( hFrontVad->buffer_12k8_fx, L_FFT / 2, Q_buffer[n] - Q_new ); @@ -499,9 +500,9 @@ ivas_error front_vad_fx( /* add up energies for later calculating average of channel energies */ // v_add( &band_energies[0], &band_energies_LR[0], &band_energies_LR[0], 2 * NB_BANDS ); // floatToFixed_arrL( &band_energies_LR[0], &band_energies_LR_fx[0], Q_new + QSCALE + 2, 40 ); - Scale_sig32( &band_energies_LR_fx[0], ( Q_new + QSCALE + 2 ) - ( Q_new_old + QSCALE + 2 - band_ener_guardbits ), 40 ); + // Scale_sig32( &band_energies_LR_fx[0], ( Q_new + QSCALE + 2 ) - ( Q_new_old + QSCALE + 2 ) - band_ener_guardbits , 40 ); // Scale_sig32( band_energies_fx,( Q_new + QSCALE + 2)-( Q_new_old + QSCALE + 2 ), 40 ); - Q_new_old = Q_new; + v_add_fixed( &band_energies_fx[0], &band_energies_LR_fx[0], &band_energies_LR_fx[0], 2 * NB_BANDS, band_ener_guardbits ); // fixedToFloat_arrL( &band_energies_LR_fx[0], &band_energies_LR[0], Q_new + QSCALE - band_ener_guardbits, 40 ); #if 0 @@ -534,13 +535,13 @@ ivas_error front_vad_fx( hFrontVad->hVAD->vad_flag = wb_vad( sts[n], fr_bands[n], &dummy, &dummy, &dummy, &snr_sum_he, &localVAD_HE_SAD[n], &dummy, hFrontVad->hVAD, hFrontVad->hNoiseEst, hFrontVad->lp_speech, hFrontVad->lp_noise ); #else // floatToFixed_arrL( fr_bands[n], fr_bands_fx[n], Q_new + QSCALE, 40 ); - for ( int i = 0; i < 40; i++ ) + /*for ( int i = 0; i < 40; i++ ) { fr_bands_cp_fx[i] = L_shr_sat( fr_bands_fx[n][i], 2 ); - } + }*/ // Scale_sig32( fr_bands_fx[n], -2, 40 ); - Scale_sig32( hFrontVads[n]->hNoiseEst->bckr_fx, Q_new + QSCALE - ( ( Q_inp - 1 ) + 3 + QSCALE ), 20 ); - Scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, Q_new + QSCALE - ( ( Q_inp - 1 ) + 3 + QSCALE ), 20 ); + Scale_sig32( hFrontVads[n]->hNoiseEst->bckr_fx, Q_new + QSCALE - ( Q_new_old + QSCALE ), 20 ); + Scale_sig32( hFrontVads[n]->hNoiseEst->enrO_fx, Q_new + QSCALE - ( Q_new_old + QSCALE ), 20 ); /* hFrontVad->lp_noise_fx = (Word16) floatToFixed( hFrontVad->lp_noise, Q8 ); hFrontVad->lp_speech_fx = (Word16) floatToFixed( hFrontVad->lp_speech, Q8 ); sts[n]->lp_speech_fx = (Word16) floatToFixed( sts[n]->lp_speech, Q8 ); @@ -560,7 +561,7 @@ ivas_error front_vad_fx( hFrontVad->hVAD->prim_act_he_fx = (Word16) ( hFrontVad->hVAD->prim_act_he * 32767 );*/ hFrontVad->hNoiseEst->sign_dyn_lp_fx = extract_h( hFrontVad->hNoiseEst->sign_dyn_lp_32fx ); hFrontVad->hNoiseEst->Etot_v_h2_fx = extract_h( hFrontVad->hNoiseEst->Etot_v_h2_32fx ); - hFrontVad->hVAD->vad_flag = wb_vad_ivas_fx( sts[n], fr_bands_cp_fx, &dummy, &dummy, &dummy, &snr_sum_he_fx, &localVAD_HE_SAD[n], &dummy_short, Q_new, hFrontVad->hVAD, hFrontVad->hNoiseEst, hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ); + hFrontVad->hVAD->vad_flag = wb_vad_ivas_fx( sts[n], fr_bands_fx[n], &dummy, &dummy, &dummy, &snr_sum_he_fx, &localVAD_HE_SAD[n], &dummy_short, Q_new, hFrontVad->hVAD, hFrontVad->hNoiseEst, hFrontVad->lp_speech_fx, hFrontVad->lp_noise_fx ); // snr_sum_he = (float)(snr_sum_he_fx / ONE_IN_Q8 ); /* hFrontVad->hNoiseEst->Etot_v_h2 = (float) ( hFrontVad->hNoiseEst->Etot_v_h2_fx / ( 256.0 ) ); hFrontVad->hNoiseEst->sign_dyn_lp = (float) ( hFrontVad->hNoiseEst->sign_dyn_lp_fx / ( 256.0 ) ); @@ -572,6 +573,7 @@ ivas_error front_vad_fx( hFrontVad->hVAD->prim_act_quick_he = (float) ( hFrontVad->hVAD->prim_act_quick_he_fx / 32767.0 ); hFrontVad->hVAD->prim_act_slow_he = (float) ( hFrontVad->hVAD->prim_act_slow_he_fx / 32767.0 ); hFrontVad->hVAD->prim_act_he = (float) ( hFrontVad->hVAD->prim_act_he_fx / 32767.0 );*/ + Q_new_old = Q_new; #endif if ( n == 0 && n_chan > 1 && last_element_mode == IVAS_CPE_DFT ) { @@ -595,7 +597,7 @@ ivas_error front_vad_fx( hFrontVads[n]->mem_preemph = fixedToFloat( hFrontVads[n]->mem_preemph_fx, Q_inp + Q_band[n] ); fixedToFloat_arr( hFrontVads[n]->buffer_12k8_fx, hFrontVads[n]->buffer_12k8, Q_buffer[n], 384 ); fixedToFloat_arrL( fr_bands_fx[n], fr_bands[n], Q_buffer[n] + QSCALE + 2, 40 ); - fixedToFloat_arrL( lf_E_fx[n], lf_E[n], Q_buffer[n] + QSCALE - 2, 148 ); + fixedToFloat_arrL( lf_E_fx[n], lf_E[n], Q_buffer[n] + QSCALE, 148 ); if ( lgBin_E_fx != NULL ) { fixedToFloat_arr( lgBin_E_fx, Bin_E, Q7, 128 ); @@ -885,7 +887,9 @@ ivas_error front_vad_spar( Word32 tmpN_fx[NB_BANDS] = { 0 }; Word32 tmpE_fx[NB_BANDS] = { 0 }; float corr_shift; - // float res_energy; + // float res_energy; + // float ncharX; + // float sp_div, dummy; Word16 corr_shift_fx; #if 1 @@ -1011,7 +1015,8 @@ ivas_error front_vad_spar( return error; } #else - if ( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands, Etot, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies, &PS[0], &st->Bin_E[0], Q_inp ) ) != IVAS_ERR_OK ) + Word16 Q_add = 0; + if ( ( error = front_vad_fx( NULL, st, hEncoderConfig, &hFrontVad, 0 /* MCT_flag */, input_frame, vad_flag_dtx, fr_bands, Etot, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies, &PS[0], &st->Bin_E[0], Q_inp, Q_add ) ) != IVAS_ERR_OK ) { return error; } @@ -1135,9 +1140,9 @@ ivas_error front_vad_spar( old_pitch = st->pitch[1]; #if 1 floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); - floatToFixed_arr( wsp, wsp_fx, Q9, 368 ); - floatToFixed_arr( st->old_wsp2, st->old_wsp2_fx, Q9, 115 ); - floatToFixed_arr( st->mem_decim2, st->mem_decim2_fx, Q9, 3 ); + floatToFixed_arr( wsp, wsp_fx, Q8, 368 ); + floatToFixed_arr( st->old_wsp2, st->old_wsp2_fx, Q8, 115 ); + floatToFixed_arr( st->mem_decim2, st->mem_decim2_fx, Q8, 3 ); // st->old_corr_fx = (Word16)floatToFixed( st->old_corr, Q15 ); corr_shift_fx = (Word16) floatToFixed( corr_shift, Q15 ); // st->old_thres_fx = (Word16) floatToFixed( st->old_thres, Q15 ); @@ -1147,8 +1152,8 @@ ivas_error front_vad_spar( // pitch_ol( st->pitch, st->voicing, &st->old_pitch, &st->old_corr, corr_shift, &st->old_thres, &st->delta_pit, st->old_wsp2, wsp, st->mem_decim2, relE, L_LOOK_12k8, st->clas, st->input_bwidth, st->Opt_SC_VBR ); fixedToFloat_arr( st->voicing_fx, st->voicing, Q15, 3 ); - fixedToFloat_arr( st->old_wsp2_fx, st->old_wsp2, Q9, 115 ); - fixedToFloat_arr( st->mem_decim2_fx, st->mem_decim2, Q9, 3 ); + fixedToFloat_arr( st->old_wsp2_fx, st->old_wsp2, Q8, 115 ); + fixedToFloat_arr( st->mem_decim2_fx, st->mem_decim2, Q8, 3 ); st->old_corr = fixedToFloat( st->old_corr_fx, Q15 ); // corr_shift_fx = floatToFixed( corr_shift, Q15 ); st->old_thres = fixedToFloat( st->old_thres_fx, Q15 ); @@ -1172,7 +1177,7 @@ ivas_error front_vad_spar( floatToFixed_arr( wsp, wsp_fx, Q9, 368 ); floatToFixed_arr( st->voicing, st->voicing_fx, Q15, 3 ); floatToFixed_arr( st->Bin_E, st->lgBin_E_fx, Q7, 128 ); - StableHighPitchDetect_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, Q12, st->lgBin_E_fx ); + StableHighPitchDetect_ivas_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, Q12, st->lgBin_E_fx ); fixedToFloat_arr( st->lgBin_E_fx, st->Bin_E, Q7, 128 ); st->LF_EnergyRatio_sm = fixedToFloat( st->LF_EnergyRatio_sm_fx, Q7 ); st->voicing_sm = fixedToFloat( st->voicing_sm_fx, Q15 ); @@ -1195,7 +1200,7 @@ ivas_error front_vad_spar( // dummy_fx = (Word16) floatToFixed( dummy, Q7 ); floatToFixed_arr( hFrontVad->hNoiseEst->old_S, hFrontVad->hNoiseEst->old_S_fx, Q7, 128 ); // loc_harm = multi_harm( st->lgBin_E_fx, hFrontVad->hNoiseEst->old_S_fx, hFrontVad->hNoiseEst->cor_map_fx, &hFrontVad->hNoiseEst->multi_harm_limit, st->total_brate, st->bwidth, ( st->hGSCEnc != NULL ) ? &st->hGSCEnc->cor_strong_limit : &dummy_int, &st->hSpMusClas->mean_avr_dyn, &st->hSpMusClas->last_sw_dyn, &cor_map_sum, &dummy, S_map ); - loc_harm = multi_harm_fx( st->lgBin_E_fx, hFrontVad->hNoiseEst->old_S_fx, hFrontVad->hNoiseEst->cor_map_fx, &hFrontVad->hNoiseEst->multi_harm_limit_fx, st->total_brate, st->bwidth, ( st->hGSCEnc != NULL ) ? &st->hGSCEnc->cor_strong_limit : &dummy_int, &st->hSpMusClas->mean_avr_dyn_fx, &st->hSpMusClas->last_sw_dyn_fx, &cor_map_sum_fx, &dummy_fx, S_map_fx ); + loc_harm = multi_harm_ivas_fx( st->lgBin_E_fx, hFrontVad->hNoiseEst->old_S_fx, hFrontVad->hNoiseEst->cor_map_fx, &hFrontVad->hNoiseEst->multi_harm_limit_fx, st->total_brate, st->bwidth, ( st->hGSCEnc != NULL ) ? &st->hGSCEnc->cor_strong_limit : &dummy_int, &st->hSpMusClas->mean_avr_dyn_fx, &st->hSpMusClas->last_sw_dyn_fx, &cor_map_sum_fx, &dummy_fx, S_map_fx ); fixedToFloat_arr( hFrontVad->hNoiseEst->old_S_fx, hFrontVad->hNoiseEst->old_S, Q7, 128 ); fixedToFloat_arr( hFrontVad->hNoiseEst->cor_map_fx, hFrontVad->hNoiseEst->cor_map, Q15, 128 ); // floatToFixed_arr( hFrontVad->hNoiseEst->cor_map, hFrontVad->hNoiseEst->old_S_fx, Q15, 128 ); @@ -1249,7 +1254,7 @@ ivas_error front_vad_spar( corr_shift_fx = (Word16) floatToFixed( corr_shift, Q15 ); floatToFixed_arrL( hFrontVad->hNoiseEst->bckr, hFrontVad->hNoiseEst->bckr_fx, Q_bands + QSCALE, NB_BANDS ); - noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_h, epsP_l, Etot_fx[0], Etot_fx[0] - hFrontVad->lp_speech_fx, corr_shift_fx, tmpE_fx, fr_bands_fx[0], &cor_map_sum_fx, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm, lf_E_fx[0], &hFrontVad->hNoiseEst->harm_cor_cnt, hFrontVad->hNoiseEst->Etot_l_lp_fx, hFrontVad->hNoiseEst->Etot_v_h2_fx, &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, Q_bands, e_min_scaled, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame ); + noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_h, epsP_l, Etot_fx[0], Etot_fx[0] - hFrontVad->lp_speech_fx, corr_shift_fx, tmpE_fx, fr_bands_fx[0], &cor_map_sum_fx, NULL, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm, lf_E_fx[0], &hFrontVad->hNoiseEst->harm_cor_cnt, hFrontVad->hNoiseEst->Etot_l_lp_fx, hFrontVad->hNoiseEst->Etot_v_h2_fx, &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, Q_bands, e_min_scaled, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame ); hFrontVad->hNoiseEst->noise_char = (float) fixedToFloat( hFrontVad->hNoiseEst->noise_char_fx, Q11 ); fixedToFloat_arrL( hFrontVad->hNoiseEst->fr_bands1_fx, hFrontVad->hNoiseEst->fr_bands1, Q_bands + QSCALE, NB_BANDS ); fixedToFloat_arrL( hFrontVad->hNoiseEst->fr_bands2_fx, hFrontVad->hNoiseEst->fr_bands2, Q_bands + QSCALE, NB_BANDS ); diff --git a/lib_enc/ivas_init_enc.c b/lib_enc/ivas_init_enc.c index f190545687a8b84204230dd882f6bed5df50e1aa..df29c2e7c6783670437d0fcb095c45f6bc987fdf 100644 --- a/lib_enc/ivas_init_enc.c +++ b/lib_enc/ivas_init_enc.c @@ -744,7 +744,7 @@ ivas_error ivas_init_encoder( move16(); test(); - IF( NE_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) || st_ivas->hEncoderConfig->stereo_dmx_evs ) + IF( NE_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) ) { IF( NE_32( ( error = create_sce_enc( st_ivas, sce_id, ivas_total_brate ) ), IVAS_ERR_OK ) ) { @@ -1375,7 +1375,7 @@ ivas_error ivas_init_encoder_fx( move16(); test(); - IF( EQ_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) && !st_ivas->hEncoderConfig->stereo_dmx_evs ) + IF( EQ_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) ) { IF( NE_32( ( error = create_evs_sce_enc( st_ivas, sce_id, ivas_total_brate ) ), IVAS_ERR_OK ) ) { @@ -2042,7 +2042,7 @@ void ivas_destroy_enc( { if ( st_ivas->hSCE[i] != NULL ) { - destroy_sce_enc( st_ivas->hSCE[i], ( EQ_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) && !st_ivas->hEncoderConfig->stereo_dmx_evs ) ); + destroy_sce_enc( st_ivas->hSCE[i], EQ_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) ); st_ivas->hSCE[i] = NULL; } } diff --git a/lib_enc/ivas_rom_enc.c b/lib_enc/ivas_rom_enc.c index e9a217312264bfce712e604e9b823ac62d46b15d..2f18205b0bf7e589f490786729f702750ef83772 100644 --- a/lib_enc/ivas_rom_enc.c +++ b/lib_enc/ivas_rom_enc.c @@ -43,9 +43,33 @@ /*----------------------------------------------------------------------------------* * Stereo classifiers ROM tables *----------------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +/* UNCLR classifier in TD stereo: list of selected features */ +const Word16 unclr_isel_td[SIZE_UNCLR_ISEL_TD] = +{ + E_corrLagStats0, E_ica_instTargetGain, E_sum_prod, E_tdm_es_em, E_m_corrL_corrR, E_d_corrL_corrR, E_corrEst0, E_corrLagMax, E_corrEstMax, E_corrEst_ncorr +}; + +/* UNCLR classifier in TD stereo:: mean & scale for feature normalization */ +const Word32 unclr_mean_td[SIZE_UNCLR_ISEL_TD] = /*Q15*/ +{ + 166277, 163748, 209465, 50557, 34744, 239251, 21, 1277892, 44, 19270 +}; + +const Word32 unclr_scale_td[SIZE_UNCLR_ISEL_TD] = /*Q15*/ +{ + 1385482, 31474, 60505, 77687, 24209, 42641, 39, 676271, 304, 11428 +}; + +/* UNCLR classifier in TD stereo: classifier parameters for logistic regression */ +const Word32 unclr_coef_td[SIZE_UNCLR_ISEL_TD] = /*Q15*/ +{ + -5875, -80055, -115720, 47164, 22570, 95476, 1290, -1332, -148091, -14371 +}; +#else /* UNCLR classifier in TD stereo: list of selected features */ -const int16_t unclr_isel_td[SIZE_UNCLR_ISEL_TD] = +const Word16 unclr_isel_td[SIZE_UNCLR_ISEL_TD] = { E_corrLagStats0, E_ica_instTargetGain, E_sum_prod, E_tdm_es_em, E_m_corrL_corrR, E_d_corrL_corrR, E_corrEst0, E_corrLagMax, E_corrEstMax, E_corrEst_ncorr }; @@ -66,6 +90,7 @@ const float unclr_coef_td[SIZE_UNCLR_ISEL_TD] = { -0.179304f, -2.443089f, -3.531498f, 1.439316f, 0.688796f, 2.913693f, 0.039382f, -0.040637f, -4.519369f, -0.438573f }; +#endif /* UNCLR classifier in DFT stereo: list of selected features */ const int16_t unclr_isel_dft[SIZE_UNCLR_ISEL_DFT] = @@ -116,7 +141,22 @@ const int16_t xtalk_isel_td[SIZE_XTALK_ISEL_TD] = { E_d_clas, E_d_voicing, E_sum_d_LSF, E_d_lepsP_13, E_d_cor_map_sum, E_d_nchar, E_d_non_sta, E_d_sp_div, E_sum_prod, E_tdm_es_em, E_m_corrL_corrR, E_corrEst0, E_corrEst_ncorr, E_corrLagStats0, E_ica_corr_value0, E_diff_corrLM_corrRM, E_tdm_LT_es_em }; +#ifdef IVAS_FLOAT_FIXED +const Word32 xtalk_mean_td[SIZE_XTALK_ISEL_TD] = /*Q15*/ +{ + 19572, 4323, 44674958, 20928, 248554, 5077, 123099, 19205, 217973, 57391, 22648, 21, 23142, 342411, 59, 612, -71224 +}; + +const Word32 xtalk_scale_td[SIZE_XTALK_ISEL_TD] = /*Q15*/ +{ + 37220, 2553, 16147962, 18786, 191263, 13110, 139310, 20547, 45408, 60617, 17627, 43, 9123, 2070239, 88, 21549, 68968 +}; +const Word32 xtalk_coef_td[SIZE_XTALK_ISEL_TD] = /*Q15*/ +{ + 1841, 4353, 3322, -5411, 1061, 2716, -2453, 1046, 45199, -51474, -2431, -2245, 2194, -542, -135853, 99, 18138 +}; +#else const float xtalk_mean_td[SIZE_XTALK_ISEL_TD] = { 0.597295f, 0.131934f, 1363.371521f, 0.638677f, 7.585252f, 0.154930f, 3.756674f, 0.586091f, @@ -137,7 +177,7 @@ const float xtalk_coef_td[SIZE_XTALK_ISEL_TD] = 1.379376f, -1.570864f, -0.074181f, -0.068519f, 0.066952f, -0.016555f, -4.145916f, 0.003024f, 0.553536f }; - +#endif const int16_t xtalk_isel_dft[SIZE_XTALK_ISEL_DFT] = { E_clas, E_gainILD, E_gainIPD, E_angle_rot, E_g_pred, E_d_prodL_prodR, E_sum_xcorr, E_xcorr_itd_value, E_gphat_d_itd2, E_gphat_ratio_m1_m2, E_gphat_m2_m2 diff --git a/lib_enc/ivas_rom_enc.h b/lib_enc/ivas_rom_enc.h index 81758f58989d6bb5863d1378177f2e998ccba702..e3164aa116a6c3562c27ed59e56caed4ce2d74ff 100644 --- a/lib_enc/ivas_rom_enc.h +++ b/lib_enc/ivas_rom_enc.h @@ -43,15 +43,28 @@ * Stereo classifiers *----------------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +extern const Word16 unclr_isel_td[]; +extern const Word32 unclr_mean_td[]; +extern const Word32 unclr_scale_td[]; +extern const Word32 unclr_coef_td[]; +#else extern const int16_t unclr_isel_td[]; extern const float unclr_mean_td[]; extern const float unclr_scale_td[]; extern const float unclr_coef_td[]; +#endif extern const int16_t xtalk_isel_td[]; +#ifdef IVAS_FLOAT_FIXED +extern const Word32 xtalk_mean_td[]; +extern const Word32 xtalk_scale_td[]; +extern const Word32 xtalk_coef_td[]; +#else extern const float xtalk_mean_td[]; extern const float xtalk_scale_td[]; extern const float xtalk_coef_td[]; +#endif extern const int16_t xtalk_isel_dft[]; extern const float xtalk_mean_dft[]; diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index 5366e0957af782c3a30c07f6707eb0fc4def15ca..b87c9d058eea6528491a86fc458d026c0bebbf99 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -468,22 +468,22 @@ typedef struct stereo_td_enc_data_structure Word32 tdm_lt_corr_LM_fx; /* Long term left-mono correlation */ Word32 tdm_last_diff_lt_corr_fx; /* long term correlation difference mem */ Word16 q_tdm_last_diff_lt_corr; - Word32 tdm_last_ratio_fx; /* Last TDM ratio */ - Word32 tdm_lt_rms_L_fx; /* Left channel long term rms */ - Word32 tdm_lt_rms_R_fx; /* Right channel long term rms */ - Word32 tdm_last_ener_lt_R_fx; /* Right channel long term energy */ - Word32 tdm_last_ener_lt_L_fx; /* Left channel long term energy */ - - Word16 tdm_last_ratio_idx; /* last TDM ratio index */ - Word16 tdm_last_SM_flag; /* Flag to signal a SM encoding scheme -> better for some music item */ - Word16 tdm_ratio_transition_mov_flag; /* Flag that indicates that L-R energy is changing */ - Word16 tdm_ratio_transition_cnt; /* Counter */ - Word16 tdm_hyst_cnt; /* Counter */ - Word16 tdm_prev_stable_idx; /* Previous Transmitted ratio index*/ - Word16 tdm_prev_desired_idx; /* Previous Transmitted ratio index*/ - float tdm_LT_es_em; /* Long term evoluation of the side to mono energy ratio */ - Word32 tdm_LT_es_em_fx; /* Long term evoluation of the side to mono energy ratio */ - Word16 tdm_use_IAWB_Ave_lpc; /* Flag to indicate the usage of mean inactive LP coefficients */ + Word32 tdm_last_ratio_fx; /* Last TDM ratio */ + Word32 tdm_lt_rms_L_fx; /* Left channel long term rms */ /*Q16*/ + Word32 tdm_lt_rms_R_fx; /* Right channel long term rms */ /*Q16*/ + Word32 tdm_last_ener_lt_R_fx; /* Right channel long term energy */ /*Q16*/ + Word32 tdm_last_ener_lt_L_fx; /* Left channel long term energy */ /*Q16*/ + + Word16 tdm_last_ratio_idx; /* last TDM ratio index */ + Word16 tdm_last_SM_flag; /* Flag to signal a SM encoding scheme -> better for some music item */ + Word16 tdm_ratio_transition_mov_flag; /* Flag that indicates that L-R energy is changing */ + Word16 tdm_ratio_transition_cnt; /* Counter */ + Word16 tdm_hyst_cnt; /* Counter */ + Word16 tdm_prev_stable_idx; /* Previous Transmitted ratio index*/ + Word16 tdm_prev_desired_idx; /* Previous Transmitted ratio index*/ + float tdm_LT_es_em; /* Long term evoluation of the side to mono energy ratio */ + Word32 tdm_LT_es_em_fx; /* Long term evoluation of the side to mono energy ratio */ /*Q21*/ + Word16 tdm_use_IAWB_Ave_lpc; /* Flag to indicate the usage of mean inactive LP coefficients */ /* NOOP parameters */ float tdm_lt_corr_RM_SM; /* Long term right-mono correlation in SM mode*/ @@ -798,9 +798,9 @@ typedef struct ivas_stereo_classifier_data_structure Word32 unclr_fv_fx[SSC_MAX_NFEA]; /* UNCLR - feature vector */ // Q15 Word32 xtalk_score_buf_fx[XTALK_SCORE_BUF_LEN]; // Q31 Word32 xtalk_fv_fx[SSC_MAX_NFEA]; /* xtalk - feature vector */ // Q15 - Word32 xtalk_wscore_fx; - Word32 xtalk_score_fx; // Q31 - Word32 xtalk_score_wrelE_fx; + Word32 xtalk_wscore_fx; // Q31 + Word32 xtalk_score_fx; // Q31 + Word32 xtalk_score_wrelE_fx; // Q31 Word32 is_speech_fx; } STEREO_CLASSIF_DATA, *STEREO_CLASSIF_HANDLE; diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c index 9eddc2fc43279147d02608b6be3cb13c79763341..5621385e78c9e896d115c7e34b6051fc0bca02d1 100644 --- a/lib_enc/ivas_stereo_classifier.c +++ b/lib_enc/ivas_stereo_classifier.c @@ -48,20 +48,27 @@ * Local constants *-------------------------------------------------------------------*/ -#define RC_FACT_UP 0.3f -#define RC_FACT_UP_Q31 644245094 -#define RC_FACT_DOWN 0.7f -#define RC_FACT_DOWN_Q31 1503238554 -#define UNCLR_SCORE_THR 4.0f -#define XTALK_SCORE_THR_DFT 4.0f -#define XTALK_SCORE_THR_DFT_Q27 ( 1 << 29 ) -#define XTALK_SCORE_THR_TD_UP 3.0f -#define XTALK_SCORE_THR_TD_DN 4.0f +#define RC_FACT_UP 0.3f +#define RC_FACT_UP_Q31 644245094 +#define RC_FACT_DOWN 0.7f +#define RC_FACT_DOWN_Q31 1503238554 +#define UNCLR_SCORE_THR 4.0f +#define UNCLR_SCORE_THR_Q28 ( 1073741824 ) +#define XTALK_SCORE_THR_DFT 4.0f +#define XTALK_SCORE_THR_DFT_Q27 ( 1 << 29 ) +#define XTALK_SCORE_THR_TD_UP 3.0f +#define XTALK_SCORE_THR_TD_UP_Q28 ( 805306368 ) +#define ONE_BY_XTALK_SCORE_THR_TD_UP_Q31 ( 715827882 ) +#define XTALK_SCORE_THR_TD_DN 4.0f +#define XTALK_SCORE_THR_TD_DN_Q28 ( 1073741824 ) +#define ONE_BY_XTALK_SCORE_THR_TD_DN_Q31 ( 536870912 ) #define UNCLR_INTERCEPT_TD 0.780313f +#define UNCLR_INTERCEPT_TD_Q28 ( 209463676 ) #define UNCLR_INTERCEPT_DFT 1.226513f #define UNCLR_INTERCEPT_DFT_Q30 1316958306 #define XTALK_INTERCEPT_TD -1.770983f +#define XTALK_INTERCEPT_TD_Q28 ( -475394629 ) #define XTALK_INTERCEPT_DFT -0.758556f #define XTALK_INTERCEPT_DFT_Q31 -1628986606 #define XTALK_INTERCEPT_DFT_Q27 -101811663 @@ -75,11 +82,16 @@ /*-------------------------------------------------------------------* * Local function prototypes *-------------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED static void rc_filter_fx( const Word32 x, Word32 *y, const Word16 order, const Word32 tau ); +#else static void rc_filter( const float x, float *y, const int16_t order, const float tau ); - +#endif +#ifdef IVAS_FLOAT_FIXED +static void edge_detect( const Word32 *inp, const Word16 len, const Word32 inp_min, const Word32 inp_max, Word16 *edge_str, Word16 *edge_type ); +#else static void edge_detect( const float *inp, const int16_t len, const float inp_min, const float inp_max, float *edge_str, int16_t *edge_type ); +#endif #ifdef IVAS_FLOAT_FIXED static Word32 redge_detect_fx( const Word32 *inp, const Word16 len, const Word32 inp_min, const Word32 inp_max, Word16 *edge_min_e ); @@ -318,12 +330,20 @@ void stereo_classifier_init_fx( move32(); hStereoClassif->dE1_ch1_fx = 0; move32(); + hStereoClassif->dE1_ch1_e = 31; + move16(); hStereoClassif->dE1_ch2_fx = 0; move32(); + hStereoClassif->dE1_ch2_e = 31; + move16(); hStereoClassif->nchar_ch1_fx = 0; move32(); + hStereoClassif->nchar_ch1_e = 31; + move16(); hStereoClassif->nchar_ch2_fx = 0; move32(); + hStereoClassif->nchar_ch2_e = 31; + move16(); hStereoClassif->non_sta_ch1_fx = 0; move32(); hStereoClassif->sp_div_ch1_fx = 0; @@ -1062,7 +1082,92 @@ void stereo_classifier_features( * * Classify current TD frame as uncorrelated L/R (1) or normal (0) *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void unclr_classifier_td_fx( + CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ +) +{ + Word16 i, ind, exp; + Word32 relE_ST, score, fvn; + Word16 edge, edge_0_1, score_exp; + STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif; + + /* calcualte raw score based on LR */ + exp = 0; + move16(); + score = UNCLR_INTERCEPT_TD_Q28; + move32(); + score_exp = 3; + move16(); + FOR( i = 0; i < SIZE_UNCLR_ISEL_TD; i++ ) + { + ind = unclr_isel_td[i]; + move16(); + + /* mean & std removal */ + fvn = BASOP_Util_Divide3232_Scale_cadence( L_sub( hStereoClassif->unclr_fv_fx[ind], unclr_mean_td[i] ), unclr_scale_td[i], &exp ); + fvn = Mpy_32_32( fvn, unclr_coef_td[i] ); // Q = 31-exp+15-31 = 15-exp + exp = add( exp, 16 ); // exp = 31-(15-exp) = 16+exp + + /* LR */ + score = BASOP_Util_Add_Mant32Exp( score, score_exp, fvn, exp, &score_exp ); + } + + score = L_shl_sat( score, sub( score_exp, 3 ) ); // Q28 + + + /* normalize score to -1:+1 */ + IF( GT_32( score, UNCLR_SCORE_THR_Q28 ) ) + { + score = UNCLR_SCORE_THR_Q28; + move32(); + } + ELSE IF( LT_32( score, -UNCLR_SCORE_THR_Q28 ) ) + { + score = -UNCLR_SCORE_THR_Q28; + move32(); + } + /*score /= 2 * UNCLR_SCORE_THR; = score = score / 8 + score Q will be 31 to account the above operation*/ + + /* weight raw score with relative energy */ + score = Mpy_32_32( score, hStereoClassif->relE_0_1_fx ); // Q31 + + /* rising edge detection on relE */ + relE_ST = Mean32( hStereoClassif->relE_buf_fx, UNCLR_L_RELE ); // Q31 + IF( hStereoClassif->relE_0_1_fx > relE_ST ) + { + rc_filter_fx( hStereoClassif->relE_0_1_fx, hStereoClassif->unclr_relE_0_1_LT_fx, UNCLR_RC_ORDER, RC_FACT_UP_Q31 ); + } + ELSE + { + rc_filter_fx( hStereoClassif->relE_0_1_fx, hStereoClassif->unclr_relE_0_1_LT_fx, UNCLR_RC_ORDER, RC_FACT_DOWN_Q31 ); + } + + edge = extract_h( L_sub( hStereoClassif->relE_0_1_fx, hStereoClassif->unclr_relE_0_1_LT_fx[UNCLR_RC_ORDER - 1] ) ); // Q15 + edge_0_1 = lin_interp_ivas_fx( edge, 0, 31129 /*0.95 in Q15*/, MAX_16, 29491 /*0.9 in Q15f*/, 1 ); // Q15 + + /* LT average */ + hStereoClassif->unclr_wscore_fx = Madd_32_16( Mpy_32_16_1( hStereoClassif->unclr_wscore_fx, edge_0_1 ), score, sub( MAX_16, edge_0_1 ) ); // Q31 + move32(); + + /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */ + test(); + test(); + test(); + test(); + test(); + if ( ( ( hStereoClassif->unclr_decision == 0 && GT_32( hStereoClassif->unclr_wscore_fx, 214748365 /*0.1f in Q31*/ ) ) || ( EQ_16( hStereoClassif->unclr_decision, 1 ) && LT_32( hStereoClassif->unclr_wscore_fx, 150323855 /*-0.07f in Q31*/ ) ) ) && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) ) + { + /* let's switch the binary decision */ + hStereoClassif->unclr_decision = !hStereoClassif->unclr_decision; + move16(); + } + + return; +} +#else void unclr_classifier_td( CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ ) @@ -1129,7 +1234,7 @@ void unclr_classifier_td( return; } - +#endif /*-------------------------------------------------------------------* * Function unclr_classifier_dft() @@ -1310,7 +1415,149 @@ void unclr_classifier_dft( * * Classify current TD frame as cross-talk frame (1) or normal stereo frame (0) *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void xtalk_classifier_td_fx( + CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ +) +{ + Word16 i, ind, edge_type, exp, score_exp; + Word32 score, fvn, scr_min, scr_max; + Word16 edge, edge_0_1, wedge, wrelE; + STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif; + + /* calcualte raw score based on LR */ + exp = 0; + move16(); + score = XTALK_INTERCEPT_TD_Q28; + move32(); + score_exp = 3; + move16(); + FOR( i = 0; i < SIZE_XTALK_ISEL_TD; i++ ) + { + ind = xtalk_isel_td[i]; + move16(); + + /* mean & std removal */ + fvn = BASOP_Util_Divide3232_Scale_cadence( L_sub( hStereoClassif->xtalk_fv_fx[ind], xtalk_mean_td[i] ), xtalk_scale_td[i], &exp ); + fvn = Mpy_32_32( fvn, xtalk_coef_td[i] ); // Q = 31-exp+15-31 = 15-exp + exp = add( exp, 16 ); // exp = 31-(15-exp) = 16+exp + + /* LR */ + score = BASOP_Util_Add_Mant32Exp( score, score_exp, fvn, exp, &score_exp ); + } + + score = L_shl_sat( score, sub( score_exp, 3 ) ); // Q28 + + /* normalize raw score to -1:+1 */ + IF( GT_32( score, XTALK_SCORE_THR_TD_UP_Q28 ) ) + { + score = MAX_32; // Q31 + move32(); + } + ELSE IF( LT_32( score, -XTALK_SCORE_THR_TD_DN_Q28 ) ) + { + score = MIN_32; // Q31 + move32(); + } + ELSE IF( score > 0 ) + { + score = Mpy_32_32( score, ONE_BY_XTALK_SCORE_THR_TD_UP_Q31 ); // Q = 31-score_exp + score = L_shl( score, 3 ); // Q31 + } + ELSE + { + score = Mpy_32_32( score, ONE_BY_XTALK_SCORE_THR_TD_DN_Q31 ); // Q = 31-score_exp + score = L_shl( score, 3 ); // Q31 + } + + IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_DFT ) ) + { + /* overwrite score if we have just switched from DFT stereo */ + score = hStereoClassif->xtalk_score_fx; // Q31 + move32(); + } + ELSE + { + hStereoClassif->xtalk_score_fx = score; // Q31 + move32(); + } + + if ( !hStereoClassif->vad_flag_glob ) + { + /* reset score to 0 in inactive segments */ + score = 0; + move32(); + } + + + /* weight raw score with relative energy */ + wrelE = lin_interp_ivas_fx( extract_h( hStereoClassif->relE_0_1_fx ), 16384 /*0.5f Q15*/, 31129 /*0.95f Q15*/, 29491 /*0.9f Q15*/, 0, 1 ); + hStereoClassif->xtalk_score_wrelE_fx = Madd_32_16( Mpy_32_16_1( hStereoClassif->xtalk_score_wrelE_fx, wrelE ), score, sub( MAX_16, wrelE ) ); // Q31 + move32(); + score = hStereoClassif->xtalk_score_wrelE_fx; // Q31 + move32(); + + /* rising edge detector on raw score -> yields 1 if strong rising edge is detected in the raw score buffer */ + Copy32( &hStereoClassif->xtalk_score_buf_fx[0], &hStereoClassif->xtalk_score_buf_fx[1], XTALK_SCORE_BUF_LEN - 1 ); + hStereoClassif->xtalk_score_buf_fx[0] = score; // Q31 + move32(); + + minimum_l( hStereoClassif->xtalk_score_buf_fx, XTALK_SCORE_BUF_LEN, &scr_min ); + maximum_l( hStereoClassif->xtalk_score_buf_fx, XTALK_SCORE_BUF_LEN, &scr_max ); + + test(); + test(); + IF( ( scr_min < 0 && GT_32( scr_max, 429496730 /*0.2f in Q31*/ ) ) || GT_32( L_sub_sat( scr_max, scr_min ), 1073741824 /*0.5f Q31*/ ) ) + { + /* test rising edge (use 0 as edge_type because of newer->older buffer samples ordering) */ + edge_type = 0; + move16(); + edge_detect( hStereoClassif->xtalk_score_buf_fx, XTALK_SCORE_BUF_LEN, -53687091 /*-0.2f Q28*/, ONE_IN_Q28, &edge, &edge_type ); + + test(); + IF( edge_type == 0 && LT_16( edge, 9830 /*0.3f Q15*/ ) ) + { + /* normalize edge to 0-1 interval */ + edge_0_1 = lin_interp_ivas_fx( sub( MAX_16, edge ), MAX_16, MAX_16, 19661 /*0.6f Q15*/, 0, 1 ); + } + ELSE + { + edge_0_1 = 0; + move16(); + } + } + ELSE + { + edge_0_1 = 0; + move16(); + } + + /* weight raw score based on rising edge detector */ + wedge = lin_interp_ivas_fx( edge_0_1, 0, 29491 /*0.9f Q15*/, MAX_16, 16384 /*0.5f Q15 */, 1 ); + + hStereoClassif->xtalk_wscore_fx = Madd_32_16( Mpy_32_16_1( hStereoClassif->xtalk_wscore_fx, wedge ), score, sub( MAX_16, wedge ) ); // Q31 + move32(); + + if ( !hStereoClassif->vad_flag_glob ) + { + hStereoClassif->xtalk_wscore_fx = 0; + move16(); + } + + /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */ + test(); + test(); + if ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->xtalk_decision == 0 && GT_32( hStereoClassif->xtalk_wscore_fx, 64424509 /*0.03f Q31*/ ) ) /*|| (hStereoClassif->xtalk_decision == 1 && hStereoClassif->xtalk_wscore < 0.00f)*/ && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) ) + { + /* let's switch the binary decision */ + hStereoClassif->xtalk_decision = !hStereoClassif->xtalk_decision; + move16(); + } + + return; +} +#else void xtalk_classifier_td( CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ ) @@ -1425,7 +1672,7 @@ void xtalk_classifier_td( return; } - +#endif /*-------------------------------------------------------------------* * Function xtalk_classifier_dft() @@ -1832,7 +2079,7 @@ static void rc_filter_fx( return; } -#endif +#else static void rc_filter( const float x, float *y, @@ -1849,7 +2096,7 @@ static void rc_filter( return; } - +#endif /*-------------------------------------------------------------------* * Function edge_detect() @@ -1859,7 +2106,143 @@ static void rc_filter( * Set edge_type to 0/1/2 when calling this function to specify the edge type you want to detect. The returned value will be modified * according to the edge type detected (-1 indicates that no edge has been detected) *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void edge_detect( + const Word32 *inp, /* i : input buffer Q31*/ + const Word16 len, /* i : length of the input buffer Q0*/ + const Word32 inp_min, /* i : minimum value for edge detection Q28*/ + const Word32 inp_max, /* i : maximum value for edge detection Q28*/ + Word16 *edge_str, /* o : edge strength (from 0 to Inf) Q15*/ + Word16 *edge_type /* i/o: edge type (to be) detected: 0 = falling, 1 = rising, 2 = both */ +) +{ + Word16 i, j, et; + Word32 y, err, edge_slope, edge[EDGE_MAX_LEN]; + Word32 edge_min, err0, L_tmp; + et = -1; + move16(); + edge_min = ONE_IN_Q25; + move32(); + + test(); + IF( *edge_type == 0 || EQ_16( *edge_type, 2 ) ) + { + /* falling edge detection */ + set_zero_fx( edge, EDGE_MAX_LEN ); + + /* set error at 0th index */ + IF( GT_32( L_shr( inp[0], 6 ), inp_max ) ) + { + err0 = 0; + move32(); + } + ELSE + { + /* inhibits edge smearing effect */ + /* err0 = powf( inp[0] - inp_max, 2 ); */ + L_tmp = L_sub( L_shr( inp[0], 3 ), inp_max ); // Q28 + err0 = Mpy_32_32( L_tmp, L_tmp ); // Q25 + } + + /* test edges on intervals from 2 to len */ + FOR( i = 1; i < len; i++ ) + { + IF( EQ_16( i, 1 ) ) + { + edge_slope = L_sub( inp_max, inp_min ); // Q28 + } + ELSE + { + edge_slope = Mpy_32_16_1( L_sub( inp_max, inp_min ), divide1616( 1, i ) ); // Q28 + } + edge[i] = err0; // Q25 + move32(); + FOR( j = 1; j <= i; j++ ) + { + y = L_sub( inp_max, imult3216( edge_slope, j ) ); // Q28 + err = L_sub( y, check_bounds_l( L_shr( inp[j], 3 ), inp_min, inp_max ) ); // Q28 + edge[i] = Madd_32_32( edge[i], err, err ); // Q25 + move32(); + } + + edge[i] = Mpy_32_16_1( edge[i], divide1616( 1, add( i, 1 ) ) ); // Q25 + move32(); + + IF( LT_32( edge[i], edge_min ) ) + { + edge_min = edge[i]; // Q25 + move32(); + et = 0; + move16(); + } + } + } + + test(); + IF( EQ_16( *edge_type, 1 ) || EQ_16( *edge_type, 2 ) ) + { + /* rising edge detection */ + set_zero_fx( edge, EDGE_MAX_LEN ); + + /* set error at 0th index */ + IF( LT_32( L_shr( inp[0], 3 ), inp_min ) ) + { + err0 = 0; + move32(); + } + ELSE + { + /* inhibits edge smearing effect */ + /*err0 = powf( inp[0] - inp_min, 2 );*/ + L_tmp = L_sub( L_shr( inp[0], 3 ), inp_min ); // Q28 + err0 = Mpy_32_32( L_tmp, L_tmp ); // Q25 + } + + /* test edges on intervals from 2 to len */ + FOR( i = 1; i <= len; i++ ) + { + IF( EQ_16( i, 1 ) ) + { + edge_slope = L_sub( inp_max, inp_min ); // Q28 + } + ELSE + { + edge_slope = Mpy_32_16_1( L_sub( inp_max, inp_min ), divide1616( 1, i ) ); // Q28 + } + + edge[i] = err0; // Q25 + move32(); + + FOR( j = 1; j < i; j++ ) + { + y = L_add( inp_min, imult3216( edge_slope, j ) ); // Q28 + err = L_sub( y, check_bounds_l( L_shr( inp[j], 1 ), inp_min, inp_max ) ); // Q28 + edge[i] = Madd_32_32( edge[i], err, err ); // Q25 + move32(); + } + + edge[i] = Mpy_32_16_1( edge[i], divide1616( 1, add( i, 1 ) ) ); // Q25 + move32(); + + IF( LT_32( edge[i], edge_min ) ) + { + edge_min = edge[i]; // Q25 + move32(); + et = 1; + move16(); + } + } + } + + *edge_str = extract_l( L_shr( edge_min, 10 ) ); // Q15 + move16(); + *edge_type = et; // Q0 + move16(); + + return; +} +#else static void edge_detect( const float *inp, /* i : input buffer */ const int16_t len, /* i : length of the input buffer */ @@ -1957,7 +2340,7 @@ static void edge_detect( return; } - +#endif /*-------------------------------------------------------------------* * Function redge_detect() diff --git a/lib_enc/ivas_stereo_cng_enc.c b/lib_enc/ivas_stereo_cng_enc.c index 42f8e8c2fd65eaeab44c808a1434b27362e58bf7..30fd5303be753135d696e39bb219a7f50d7f8312 100644 --- a/lib_enc/ivas_stereo_cng_enc.c +++ b/lib_enc/ivas_stereo_cng_enc.c @@ -867,7 +867,7 @@ void stereo_dft_cng_side_gain_fx( Word32 sg_average_fx_q31; FOR( b = 0; b < hStereoDft->nbands; b++ ) { - sg_average_fx_q31 = L_shl( hStereoCng->sg_average_fx[b], 5 ); // Q31 + sg_average_fx_q31 = L_shl_sat( hStereoCng->sg_average_fx[b], 5 ); // Q31 stereo_dft_quantize_res_gains_fx( &sg_average_fx_q31, NULL, NULL, NULL, hStereoDft->side_gain_index_EC + b, NULL ); } } diff --git a/lib_enc/ivas_stereo_td_analysis.c b/lib_enc/ivas_stereo_td_analysis.c index 8a3be7590dd8b4486df38e115fe907704f286248..4345236c9a7a0a6fc5a813980253d48613d0b69d 100644 --- a/lib_enc/ivas_stereo_td_analysis.c +++ b/lib_enc/ivas_stereo_td_analysis.c @@ -62,6 +62,11 @@ #define RMS_MIN 1500 /* Minimum energy for ratio index*/ #define RMS_MIN2 1000 /* Minimum energy for LR encoding*/ #define CORR_THRES 0.95f /* Maximal open loop correlation */ +#ifdef IVAS_FLOAT_FIXED +#define RMS_MIN_Q16 98304000 /* 1500 in Q16 */ /* Minimum energy for ratio index*/ +#define RMS_MIN2_Q16 65536000 /* 1000 in Q16 */ /* Minimum energy for LR encoding*/ +#define CORR_THRES_Q15 31130 /* Maximal open loop correlation */ +#endif #ifndef IVAS_FLOAT_FIXED #define DT_ENER_THR 200 /* Energy variation threshold */ #endif @@ -115,7 +120,10 @@ #define PG2ND2 3.0f #define EUCLDST2 0.08f -#define RMS_THR 100 +#define RMS_THR 100 +#ifdef IVAS_FLOAT_FIXED +#define RMS_THR_Q16 ( 6553600 ) +#endif #define RATIO_PG_LRTD 0.96f #define IVAS_BRATE_OMASA_STEREO_SW_THR 15000 @@ -142,39 +150,639 @@ static Word16 limit_idx_Dwnmix_fx( const Word16 idx_in, const Word16 unclr_decis static Word16 limit_idx_NoDwnmix_fx( const Word16 idx_in, const Word16 side_can_change, const Word32 d_lt_corr_raw, const Word16 q_d_lt_corr_raw ); -static void Get_LR_rms_fx( const Word16 *Left_in, const Word16 *Right_in, const Word16 input_frame, Word32 *rms_L, Word16 *q_rms_L, Word32 *rms_R, Word16 *q_rms_R ); +static void Get_LR_rms_fx( const Word16 *Left_in, const Word16 *Right_in, const Word16 input_frame, Word32 *rms_L, Word16 *q_rms_L, Word32 *rms_R, Word16 *q_rms_R ); + +static Word16 Get_dt_lt_ener_fx( CPE_ENC_HANDLE hCPE, const Word16 IsSideMono, const Word16 input_frame, const Word16 tdm_last_SM_flag, const Word32 rms_L, const Word16 q_rms_L, const Word32 rms_R, const Word16 q_rms_R, Word32 *tdm_lt_rms_L, Word32 *tdm_lt_rms_R, Word32 *tdm_last_ener_lt_L, Word32 *tdm_last_ener_lt_R, Word32 *tdm_LT_es_em, Word16 *tdm_hyst_cnt, Word16 *tdm_NOOP_SM_flag_loc, Word32 *ener_R_dt, Word32 *ener_L_dt, Word32 *corr_LM, Word16 *q_corr_LM, Word32 *corr_RM, Word16 *q_corr_RM ); + +static void NOOP_decision_fx( CPE_ENC_HANDLE hCPE, const Word16 tdm_NOOP_flag_loc, const Word16 tmp_SM_flag, const Word32 rms_L, const Word16 q_rms_L, const Word32 rms_R, const Word16 q_rms_R, Word16 *tdm_SM_flag_loc ); + +static Word32 Comp_diff_lt_corr_fx( CPE_ENC_HANDLE hCPE, const Word16 IsSideMono, const Word32 rms_L, const Word16 q_rms_L, const Word32 rms_R, const Word16 q_rms_R, const Word32 ener_L_dt, const Word32 ener_R_dt, Word32 corr_LM, Word16 q_corr_LM, Word32 corr_RM, Word16 q_corr_RM, const Word32 tdm_lt_rms_L, const Word32 tdm_lt_rms_R, Word32 *tdm_lt_corr_LM, Word32 *tdm_lt_corr_RM, Word32 *tdm_last_diff_lt_corr, Word16 *q_tdm_last_diff_lt_corr, Word32 *inst_ratio_L_out, Word32 *diff_lt_corr, Word16 *q_d_lt_corr_raw ); +#else +static int16_t stereo_tdm_ener_analysis_SM( CPE_ENC_HANDLE hCPE, Encoder_State **sts, const int16_t input_frame, int16_t *tdm_SM_flag ); + +static void Get_corr_n( const float L[], const float R[], float *ic_Lm, float *ic_Rm, const int16_t len, float *es_em, const int16_t tdm_SM_calc_flag ); + +static int16_t stereo_smooth_LR_transition( int16_t *tdm_prev_stable_idx, int16_t *tdm_ratio_transition_mov_flag, int16_t tdm_last_ratio_idx, int16_t *tdm_prev_desired_idx, int16_t *tdm_ratio_transition_cnt, const int16_t tdm_SM_flag, int16_t desired_idx ); + +static int16_t limit_idx_Dwnmix( const int16_t idx_in, const int16_t unclr_decision, const int16_t inst_idx, const int16_t previous_idx, const int16_t tdm_last_LRTD_PriCh_cnt, const int16_t tdm_last_LRTD_frame_cnt ); + +static int16_t limit_idx_NoDwnmix( const int16_t idx_in, const int16_t side_can_change, const float d_lt_corr_raw ); + +static void Get_LR_rms( const float *Left_in, const float *Right_in, const int16_t input_frame, float *rms_L, float *rms_R ); + +static int16_t Get_dt_lt_ener( CPE_ENC_HANDLE hCPE, const int16_t IsSideMono, const int16_t input_frame, const int16_t tdm_last_SM_flag, const float rms_L, const float rms_R, float *tdm_lt_rms_L, float *tdm_lt_rms_R, float *tdm_last_ener_lt_L, float *tdm_last_ener_lt_R, float *tdm_LT_es_em, int16_t *tdm_hyst_cnt, int16_t *tdm_NOOP_SM_flag_loc, float *ener_R_dt, float *ener_L_dt, float *corr_LM, float *corr_RM ); + +static void NOOP_decision( CPE_ENC_HANDLE hCPE, const int16_t tdm_NOOP_flag_loc, const int16_t tmp_SM_flag, const float rms_L, const float rms_R, int16_t *tdm_SM_flag_loc ); + +static float Comp_diff_lt_corr( CPE_ENC_HANDLE hCPE, const int16_t IsSideMono, const float rms_L, const float rms_R, const float ener_L_dt, const float ener_R_dt, float corr_LM, float corr_RM, const float tdm_lt_rms_L, const float tdm_lt_rms_R, float *tdm_lt_corr_LM, float *tdm_lt_corr_RM, float *tdm_last_diff_lt_corr, float *inst_ratio_L_out, float *diff_lt_corr ); +#endif + + +/*-------------------------------------------------------------------* + * Function stereo_tdm_ener_analysis() + * + *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +Word16 stereo_tdm_ener_analysis_fx( + const Word16 ivas_format, /* i : IVAS format */ + CPE_ENC_HANDLE hCPE, /* i : CPE structure */ + const Word16 input_frame, /* i : Number of samples */ + Word16 *tdm_SM_or_LRTD_Pri, /* o : channel combination scheme flag in TD stereo OR LRTD primary channel */ + Word16 *tdm_ratio_idx_SM /* o : TDM ratio index for SM mode */ +) +{ + Word32 diff_lt_corr_fx /*Q24*/; + Word32 ratio_L_fx, dist_fx; + Word16 i, side_can_change; + Word16 idx, tdm_SM_flag_loc; + Word16 tmp_SM_flag; + Word16 desired_idx; + Word32 rms_thd_fx; // Q16 + Word16 tdm_NOOP_flag_loc, tdm_NOOP_flag; + STEREO_TD_ENC_DATA_HANDLE hStereoTD; + Encoder_State **sts; + Word32 d_lt_corr_raw_fx; + Word16 q_d_lt_corr_raw; + Word32 inst_ratio_L_fx; // Q24 + Word16 tdm_LRTD_pri_side; + Word32 rms_L_fx, rms_R_fx; + Word16 q_rms_L, q_rms_R; + Word32 corr_RM_fx, corr_LM_fx; + Word16 q_corr_LM, q_corr_RM; + Word32 ener_R_dt_fx, ener_L_dt_fx; + + hStereoTD = hCPE->hStereoTD; + sts = hCPE->hCoreCoder; + + desired_idx = 0; + move16(); + inst_ratio_L_fx = 0; + move32(); + diff_lt_corr_fx = 0; + move32(); + /*----------------------------------------------------------------* + * Compute L and R energy and Long term RMS of each channel + *----------------------------------------------------------------*/ + + Get_LR_rms_fx( sts[0]->input_fx, sts[1]->input_fx, input_frame, &rms_L_fx, &q_rms_L, &rms_R_fx, &q_rms_R ); + + /*----------------------------------------------------------------* + * Compute the 1st order energy difference difference + * Compute the gain of L&R channel compared to mono + * - estimate the long term evolution of the L to Mono gain + * - estimate the long term evolution of the R to Mono gain + * - estimate the long term difference between the long term + * - evolution of the L and R to Mono gain + *----------------------------------------------------------------*/ + + tdm_SM_flag_loc = hStereoTD->tdm_last_SM_flag; + move16(); + + tmp_SM_flag = Get_dt_lt_ener_fx( hCPE, 0, input_frame, hStereoTD->tdm_last_SM_flag, rms_L_fx, q_rms_L, rms_R_fx, q_rms_R, + &hStereoTD->tdm_lt_rms_L_fx, &hStereoTD->tdm_lt_rms_R_fx, &hStereoTD->tdm_last_ener_lt_L_fx, &hStereoTD->tdm_last_ener_lt_R_fx, + &hStereoTD->tdm_LT_es_em_fx, &hStereoTD->tdm_hyst_cnt, &tdm_NOOP_flag_loc, + &ener_R_dt_fx, &ener_L_dt_fx, &corr_LM_fx, &q_corr_LM, &corr_RM_fx, &q_corr_RM ); + + hStereoTD->tdm_SM_reset_flag = 0; + move16(); + + /*----------------------------------------------------------------* + * Check if the signal has Near Out Of Phase characteristics + * and trigger side/mono configuration if needed + *----------------------------------------------------------------*/ + + NOOP_decision_fx( hCPE, tdm_NOOP_flag_loc, tmp_SM_flag, rms_L_fx, q_rms_L, rms_R_fx, q_rms_R, &tdm_SM_flag_loc ); + + /*----------------------------------------------------------------* + * Adjust stereo downmixing adaptation rate factor + * in function of the signal energy. If signal energy is low, + * adaptation rate factor is lower. This prevent stereo image + * move on speech offset + *----------------------------------------------------------------*/ + d_lt_corr_raw_fx = Comp_diff_lt_corr_fx( hCPE, 0, rms_L_fx, q_rms_L, rms_R_fx, q_rms_R, ener_L_dt_fx, ener_R_dt_fx, corr_LM_fx, q_corr_LM, corr_RM_fx, q_corr_RM, hStereoTD->tdm_lt_rms_L_fx, hStereoTD->tdm_lt_rms_R_fx, &hStereoTD->tdm_lt_corr_LM_fx, + &hStereoTD->tdm_lt_corr_RM_fx, &hStereoTD->tdm_last_diff_lt_corr_fx, &hStereoTD->q_tdm_last_diff_lt_corr, &inst_ratio_L_fx, &diff_lt_corr_fx, &q_d_lt_corr_raw ); + + IF( GT_16( q_d_lt_corr_raw, 31 ) ) + { + d_lt_corr_raw_fx = L_shr( d_lt_corr_raw_fx, sub( q_d_lt_corr_raw, Q31 ) ); + q_d_lt_corr_raw = Q31; + move16(); + } + + /*----------------------------------------------------------------* + * UNCLR classifier (detection of uncorrelated L and R channels) + * Xtalk classifier (detection of cross-talk L and R channels) + *----------------------------------------------------------------*/ + + unclr_classifier_td_fx( hCPE ); + xtalk_classifier_td_fx( hCPE ); + + /* switch to LRTD on cross-talk segments where two speakers are weakly correlated */ + hStereoTD->prev_fr_LRTD_TD_dec = hCPE->hStereoClassif->lrtd_mode; + move16(); + + /*----------------------------------------------------------------* + * When the energies of channels are low enough, compute the ratio + * of L and R needed to create new mono/side signals + *----------------------------------------------------------------*/ + + IF( EQ_16( ivas_format, MASA_ISM_FORMAT ) ) + { + test(); + test(); + if ( ( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) || EQ_16( hCPE->hStereoTD->prev_fr_LRTD_TD_dec, 1 ) ) && ( LE_32( L_add( L_sub( hCPE->element_brate, 50 * FRAMES_PER_SEC ), L_add( hCPE->brate_surplus, hCPE->brate_surplus ) ), IVAS_BRATE_OMASA_STEREO_SW_THR ) ) ) + { + hStereoTD->prev_fr_LRTD_TD_dec = 0; + move16(); + } + } + + rms_thd_fx = RMS_MIN_Q16; + move32(); + IF( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) ) + { + rms_thd_fx = L_shr( rms_thd_fx, 2 ); /*Q16*/ /*rms_thd_fx *= 0.25f*/ + test(); + test(); + IF( LE_32( hStereoTD->tdm_lt_rms_L_fx, 4915200 /* 75 in Q16*/ ) || LE_32( hStereoTD->tdm_lt_rms_R_fx, 75 /* 75 in Q16*/ ) /*|| sts[0]->last_coder_type == TRANSITION */ ) + { + rms_thd_fx = L_shr( rms_thd_fx, 5 ); /* Q16*/ /*rms_thd_fx *= 0.03125f*/ + } + ELSE IF( GE_16( sts[0]->hVAD->hangover_cnt, 8 ) && GE_16( sts[1]->hVAD->hangover_cnt, 8 ) ) + { + rms_thd_fx = imult3216( rms_thd_fx, 5 /*1/0.2f*/ ); /*Q16*/ /*rms_thd_fx /= 0.2f*/ + } + + /* Overwrite the LR decision flag in case the signals is already considered as S/M or when the signal is very similar between left and right channel */ + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + IF( EQ_16( tdm_SM_flag_loc, 1 ) ) + { + hStereoTD->prev_fr_LRTD_TD_dec = 0; + move16(); + } + ELSE IF( EQ_16( hStereoTD->tdm_LRTD_flag, 1 ) && GT_16( hStereoTD->tdm_FD2LRTD_SW_cnt, 10 ) && + ( hCPE->hStereoClassif->vad_flag_glob == 0 || ( hCPE->hStereoClassif->unclr_decision == 0 && ( LT_32( hCPE->hStereoClassif->xtalk_score_fx, -1717986918 /* -0.8f in Q31*/ ) || LT_32( hCPE->hStereoClassif->xtalk_wscore_fx, -279172874 /*-0.13f in Q31*/ ) ) ) || + ( EQ_16( hCPE->hStereoClassif->unclr_decision, 1 ) && sts[0]->last_clas == UNVOICED_CLAS && sts[1]->last_clas == UNVOICED_CLAS && LT_32( L_abs( hCPE->hStereoClassif->unclr_wscore_fx ), 10737418 /* 0.005f in Q31 */ ) ) ) ) + { + /* This forces the LRTD to switch to TD when inactive content happens on both channel */ + hStereoTD->prev_fr_LRTD_TD_dec = 0; + move16(); + } + ELSE IF( hStereoTD->tdm_LRTD_flag == 0 && + ( hCPE->hStereoClassif->vad_flag_glob == 0 || ( hCPE->hStereoClassif->unclr_decision == 0 && ( hCPE->hStereoClassif->xtalk_score_fx <= 0 || LE_32( hCPE->hStereoClassif->xtalk_wscore_fx, 214748364 /*0.1f in Q31 */ ) ) ) || + ( EQ_16( hCPE->hStereoClassif->unclr_decision, 1 ) && ( sts[0]->last_clas == UNVOICED_CLAS && sts[1]->last_clas == UNVOICED_CLAS ) && LT_32( L_abs( hCPE->hStereoClassif->unclr_wscore_fx ), 53687091 /*0.025f in Q31 */ ) ) + /* (sts[0]->last_clas == UNVOICED_CLAS && sts[1]->last_clas == UNVOICED_CLAS && hCPE->hStereoClassif->xtalk_wscore <= 0.0f)*/ ) ) + { + /* This forces the LRTD to switch to TD when inactive content happens on both channel */ + hStereoTD->prev_fr_LRTD_TD_dec = 0; + move16(); + } + } + + side_can_change = 0; + move16(); + + /* update LRTD->DFT stereo hangover counters */ + IF( EQ_16( hStereoTD->prev_fr_LRTD_TD_dec, 1 ) ) + { + hStereoTD->tdm_last_LRTD_frame_cnt = 0; + move16(); + } + ELSE + { + hStereoTD->tdm_last_LRTD_frame_cnt = add( hStereoTD->tdm_last_LRTD_frame_cnt, 1 ); + move16(); + hStereoTD->tdm_last_LRTD_frame_cnt = s_min( hStereoTD->tdm_last_LRTD_frame_cnt, 100 ); + move16(); + } + + if ( NE_16( hCPE->last_element_mode, IVAS_CPE_TD ) ) + { + side_can_change = 1; + move16(); + } + + test(); + IF( EQ_16( hStereoTD->prev_fr_LRTD_TD_dec, 1 ) && side_can_change == 0 ) + { + test(); + test(); + test(); + test(); + IF( ( LE_32( hStereoTD->tdm_lt_rms_L_fx, rms_thd_fx ) && LE_32( hStereoTD->tdm_lt_rms_R_fx, L_shl( rms_thd_fx, 1 ) ) ) || + ( LE_32( hStereoTD->tdm_lt_rms_R_fx, rms_thd_fx ) && LE_32( hStereoTD->tdm_lt_rms_L_fx, L_shl( rms_thd_fx, 1 ) ) ) || + ( sts[0]->hVAD->hangover_cnt != 0 && LT_16( sts[1]->hNoiseEst->Etot_last_fx, 3072 /*12 in Q8*/ ) ) || + ( sts[1]->hVAD->hangover_cnt != 0 && LT_16( sts[0]->hNoiseEst->Etot_last_fx, 3072 /*12 in Q8*/ ) ) || + ( NE_16( sts[0]->hSpMusClas->past_dec[0], sts[1]->hSpMusClas->past_dec[0] ) ) ) + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + if ( ( ( LT_32( rms_L_fx, RMS_THR_Q16 ) && LT_32( rms_R_fx, RMS_THR_Q16 ) && LT_32( L_abs( L_sub( rms_R_fx, rms_L_fx ) ), RMS_THR_Q16 / 2 ) && GT_32( L_abs( d_lt_corr_raw_fx ), L_shr( 644245094 /*0.3f in Q31*/, sub( Q31, q_d_lt_corr_raw ) ) ) ) || + ( LT_16( abs_s( sub( sts[0]->old_corr_fx, sts[1]->old_corr_fx ) ), 4915 /*0.15f in Q15*/ ) && GT_16( sts[0]->old_corr_fx, 22937 /*0.7f in Q15*/ ) && LT_16( sts[0]->old_corr_fx, 27853 /*0.85f in Q15*/ ) && LT_32( L_abs( L_sub( rms_L_fx, rms_R_fx ) ), rms_thd_fx ) && GT_32( L_abs( d_lt_corr_raw_fx ), L_shr( 644245094 /*0.3f in Q31*/, sub( Q31, q_d_lt_corr_raw ) ) ) ) ) /* Both channels are low energy, clean background switching is allowed */ + ) + { + side_can_change = 1; + move16(); + } + } + } + ELSE IF( side_can_change == 0 ) /*if( hStereoTD->prev_fr_LRTD_TD_dec == 0 )*/ + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + IF( ( ( LT_16( sts[0]->old_corr_fx, CORR_THRES_Q15 ) && LT_16( sts[1]->old_corr_fx, CORR_THRES_Q15 ) ) || ( LE_32( hStereoTD->tdm_lt_rms_L_fx, RMS_MIN2_Q16 ) && LE_32( hStereoTD->tdm_lt_rms_R_fx, RMS_MIN2_Q16 ) ) ) && + ( ( ( LE_32( hStereoTD->tdm_lt_rms_L_fx, rms_thd_fx ) && LE_32( hStereoTD->tdm_lt_rms_R_fx, L_shl( rms_thd_fx, 1 ) ) ) || ( LE_32( hStereoTD->tdm_lt_rms_R_fx, rms_thd_fx ) && LE_32( hStereoTD->tdm_lt_rms_L_fx, L_shl( rms_thd_fx, 1 ) ) ) ) || + ( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) && ( sts[0]->tdm_LRTD_flag == 0 || ( EQ_16( sts[0]->tdm_LRTD_flag, 1 ) && ( ( LT_32( rms_L_fx, L_shl( rms_thd_fx, 1 ) ) && LT_32( rms_R_fx, L_shl( rms_thd_fx, 1 ) ) ) || ( NE_16( sts[0]->hSpMusClas->past_dec[0], sts[1]->hSpMusClas->past_dec[0] ) ) ) ) ) ) /* Even if the UNCLR is set to 1, the content should be encoded with TD, lower swichting requierment */ + ) ) + { + side_can_change = 1; + move16(); + } + } + + test(); + test(); + if ( GE_32( hCPE->hStereoClassif->xtalk_wscore_fx, 107374182 /*0.05f in Q31*/ ) && hStereoTD->prev_fr_LRTD_TD_dec == 0 && EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) ) + { + side_can_change = 0; + move16(); + } + test(); + test(); + IF( EQ_16( hCPE->last_element_mode, IVAS_CPE_MDCT ) || EQ_16( hStereoTD->flag_skip_DMX, 1 ) ) + { + desired_idx = LRTD_STEREO_LEFT_IS_PRIM; + move16(); + hStereoTD->tdm_prev_desired_idx = LRTD_STEREO_LEFT_IS_PRIM; + move16(); + ratio_L_fx = ONE_IN_Q31; /*Q31*/ + move32(); + hStereoTD->tdm_prev_stable_idx = LRTD_STEREO_LEFT_IS_PRIM; + move16(); + hStereoTD->tdm_ratio_transition_mov_flag = 0; + move16(); + hStereoTD->tdm_last_ratio_idx = LRTD_STEREO_LEFT_IS_PRIM; + move16(); + hStereoTD->tdm_ratio_transition_cnt = 0; + move16(); + idx = TDM_NQ; /* Reserved quantizer index for special case */ + move16(); + } + ELSE IF( side_can_change || LE_16( sts[1]->ini_frame, 1 ) ) + { + ratio_L_fx = L_max( diff_lt_corr_fx, -RATIO_MAX_FX_Q24 ); // Q24 + ratio_L_fx = L_min( ratio_L_fx, RATIO_MAX_FX_Q24 ); // Q24 + ratio_L_fx = Madd_32_32( ONE_IN_Q24, 1432371593 /*0.667f in Q31*/, ratio_L_fx ); // Q24 + + test(); + test(); + IF( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) && ( NE_16( hCPE->last_element_mode, IVAS_CPE_TD ) || LT_16( hStereoTD->tdm_FD2LRTD_SW_cnt, 4 ) ) ) + { + ratio_L_fx = hCPE->hStereoTD->tdm_last_ratio_fx; // Q31 /* note: the last_ratio is set in before in stereo_set_tdm() */ + move32(); + } + ELSE + { + /*ratio_L = ( 1.0f - cosf( EVS_PI * ratio_L / 2.0f ) ) / 2.0f;*/ + ratio_L_fx = L_deposit_h( sub( ONE_IN_Q14, getCosWord16( extract_l( Mpy_32_32( 1647099 /* EVS_PI/2 in Q20 */, ratio_L_fx ) ) ) ) ); // Q31 (Q14 + Q1(division by 2.0f) + Q16) + } + + test(); + test(); + test(); + IF( EQ_16( hStereoTD->tdm_LRTD_flag, 1 ) || ( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) && ( hCPE->hStereoClassif->prev_lrtd_mode == 0 || GT_16( abs_s( sub( hCPE->hStereoTCA->indx_ica_gD, 20 ) ), 2 ) ) ) ) + { + IF( GE_32( ratio_L_fx, 1138166333 /*0.53f in Q31*/ ) ) /* small hysteresis is used to prevent undesired switching during inactive segment */ + { + desired_idx = LRTD_STEREO_LEFT_IS_PRIM - 1; + move16(); + } + ELSE IF( LT_32( ratio_L_fx, 1009317315 /*0.47f in Q31*/ ) ) + { + desired_idx = LRTD_STEREO_RIGHT_IS_PRIM + 1; + move16(); + } + ELSE IF( GT_32( L_sub( rms_L_fx, rms_R_fx ), 655360 /*10 in Q16*/ ) ) + { + desired_idx = LRTD_STEREO_LEFT_IS_PRIM - 1; + move16(); + } + ELSE + { + desired_idx = LRTD_STEREO_RIGHT_IS_PRIM + 1; + move16(); + } + + test(); + test(); + IF( NE_16( desired_idx, hStereoTD->tdm_prev_desired_idx ) && EQ_16( hStereoTD->tdm_last_LRTD_frame_cnt, 1 ) && LE_16( sts[0]->last_coder_type, UNVOICED ) ) /* TD transtionning to FD, we don't want an inversion of channels on the first transition frame */ + { + desired_idx = hStereoTD->tdm_prev_desired_idx; + move16(); + } + ELSE + { + hStereoTD->tdm_prev_desired_idx = desired_idx; + move16(); + } + idx = desired_idx; + move16(); + } + ELSE + { + test(); + test(); + if ( GE_32( hCPE->element_brate, IVAS_48k ) && sts[0]->hVAD->hangover_cnt != 0 && LT_32( L_max( hStereoTD->tdm_lt_rms_L_fx, hStereoTD->tdm_lt_rms_R_fx ), 33554432 /* 512.0f */ ) ) + { + ratio_L_fx = check_bounds_l( ratio_L_fx, 644245094 /*0.3f in Q31*/, 1503238554 /*0.7f in Q31*/ ); + } + + test(); + test(); + test(); + test(); + IF( ( GT_32( hCPE->hStereoTCA->instTargetGain_fx, 644245094 /*1.2f in Q29*/ ) || GT_32( hCPE->hStereoTCA->targetGain_fx, ONE_IN_Q29 ) ) && LT_32( ratio_L_fx, 858993459 /*0.4f*/ ) ) + { + ratio_L_fx = 858993459; /*0.4f in Q31*/ + move32(); + } + ELSE IF( ( LT_32( hCPE->hStereoTCA->instTargetGain_fx, 429496730 /*0.8f*/ ) || LT_32( hCPE->hStereoTCA->targetGain_fx, ONE_IN_Q29 ) ) && GT_32( ratio_L_fx, 1288490189 /*0.6f in Q31*/ ) ) + { + ratio_L_fx = 1288490189; /* 0.6f in Q31 */ + move32(); + } + + dist_fx = L_abs( L_sub( ratio_L_fx, tdm_ratio_tabl_fx[0] ) ); // Q31 + + desired_idx = 0; + move16(); + FOR( i = 1; i < TDM_NQ; i++ ) + { + IF( LE_32( L_abs( L_sub( ratio_L_fx, tdm_ratio_tabl_fx[i] ) ), dist_fx ) ) + { + dist_fx = L_abs( L_sub( ratio_L_fx, tdm_ratio_tabl_fx[i] ) ); // Q31 + desired_idx = i; + move16(); + } + } + + idx = stereo_smooth_LR_transition_fx( &hStereoTD->tdm_prev_stable_idx, &hStereoTD->tdm_ratio_transition_mov_flag, hStereoTD->tdm_last_ratio_idx, &hStereoTD->tdm_prev_desired_idx, &hStereoTD->tdm_ratio_transition_cnt, tdm_SM_flag_loc, desired_idx ); + + /* Change the switching level in case of dual mono (in case the scenario still accept left right switching */ + /* This logic is needed in case the content is exactly the same in the 2 channel and it is expected to get back to LRTD, to prevent the secondary channel to be completely empty */ + IF( EQ_16( hCPE->hStereoClassif->lrtd_mode, 1 ) ) + { + IF( LE_16( idx, LRTD_STEREO_MID_IS_PRIM ) ) + { + idx = s_min( idx, LRTD_STEREO_MID_IS_PRIM - 1 ); + } + ELSE + { + idx = s_max( idx, LRTD_STEREO_MID_IS_PRIM + 1 ); + } + + hStereoTD->tdm_prev_desired_idx = idx; + move16(); + } + /* 0 and 30 are reserved to signal L-R only coding */ + } + } + ELSE + { + idx = hStereoTD->tdm_last_ratio_idx; + move16(); + } + + hStereoTD->tdm_inst_ratio_idx = LRTD_STEREO_RIGHT_IS_PRIM; + move16(); + tdm_LRTD_pri_side = -1; + move16(); + IF( LT_16( hStereoTD->tdm_FD2LRTD_SW_cnt, 5 ) ) + { + desired_idx = 15; + move16(); + } + ELSE + { + desired_idx = 0; + move16(); + dist_fx = L_abs( L_sub( inst_ratio_L_fx, L_shr( tdm_ratio_tabl_fx[0], 7 ) ) ); // Q24 + + FOR( i = 1; i < TDM_NQ; i++ ) + { + IF( LE_32( L_abs( L_sub( inst_ratio_L_fx, L_shr( tdm_ratio_tabl_fx[i], 7 ) ) ), dist_fx ) ) + { + dist_fx = L_abs( L_sub( inst_ratio_L_fx, L_shr( tdm_ratio_tabl_fx[i], 7 ) ) ); // Q24 + desired_idx = i; + move16(); + } + } + } + + IF( LT_16( sub( sts[1]->lp_speech_fx, sts[1]->lp_noise_fx ), 12800 /*50.0f*/ ) ) /* likely presence of noisy content */ + { + /* pointing in the right direction, inverse it else do nothing */ + test(); + test(); + test(); + test(); + IF( ( GT_16( idx, LRTD_STEREO_MID_IS_PRIM ) && GT_16( desired_idx, LRTD_STEREO_MID_IS_PRIM ) ) || ( LT_16( idx, LRTD_STEREO_MID_IS_PRIM ) && LT_16( desired_idx, LRTD_STEREO_MID_IS_PRIM ) ) ) + { + Word16 idx_offet; + idx_offet = 5; + move16(); + if ( GT_16( desired_idx, LRTD_STEREO_MID_IS_PRIM ) ) /* slightly Favor the 2nd channel */ + { + idx_offet = negate( idx_offet ); + } + desired_idx = add( desired_idx, idx_offet ); + desired_idx = check_bounds_s_fx( desired_idx, 0, 30 ); + } + } -static Word16 Get_dt_lt_ener_fx( CPE_ENC_HANDLE hCPE, const Word16 IsSideMono, const Word16 input_frame, const Word16 tdm_last_SM_flag, const Word32 rms_L, const Word16 q_rms_L, const Word32 rms_R, const Word16 q_rms_R, Word32 *tdm_lt_rms_L, Word32 *tdm_lt_rms_R, Word32 *tdm_last_ener_lt_L, Word32 *tdm_last_ener_lt_R, Word32 *tdm_LT_es_em, Word16 *tdm_hyst_cnt, Word16 *tdm_NOOP_SM_flag_loc, Word32 *ener_R_dt, Word32 *ener_L_dt, Word32 *corr_LM, Word16 *q_corr_LM, Word32 *corr_RM, Word16 *q_corr_RM ); + test(); + IF( sts[1]->clas != UNVOICED_CLAS || sts[0]->clas != UNVOICED_CLAS ) + { + desired_idx = check_bounds_s_fx( desired_idx, 5, 25 ); + } -static void NOOP_decision_fx( CPE_ENC_HANDLE hCPE, const Word16 tdm_NOOP_flag_loc, const Word16 tmp_SM_flag, const Word32 rms_L, const Word16 q_rms_L, const Word32 rms_R, const Word16 q_rms_R, Word16 *tdm_SM_flag_loc ); + hStereoTD->tdm_inst_ratio_idx = desired_idx; + move16(); + IF( /*hCPE->last_element_mode == IVAS_CPE_MDCT ||*/ EQ_16( hStereoTD->flag_skip_DMX, 1 ) ) + { + /*force tdm_inst_ratio_idx to the reserved index */ + hStereoTD->tdm_inst_ratio_idx = idx; + move16(); + tdm_LRTD_pri_side = 1; /* left channel */ + move16(); + } + ELSE IF( EQ_16( hStereoTD->tdm_LRTD_flag, 1 ) ) + { + idx = limit_idx_NoDwnmix_fx( idx, side_can_change, d_lt_corr_raw_fx, q_d_lt_corr_raw ); -static Word32 Comp_diff_lt_corr_fx( CPE_ENC_HANDLE hCPE, const Word16 IsSideMono, const Word32 rms_L, const Word16 q_rms_L, const Word32 rms_R, const Word16 q_rms_R, const Word32 ener_L_dt, const Word32 ener_R_dt, Word32 corr_LM, Word16 q_corr_LM, Word32 corr_RM, Word16 q_corr_RM, const Word32 tdm_lt_rms_L, const Word32 tdm_lt_rms_R, Word32 *tdm_lt_corr_LM, Word32 *tdm_lt_corr_RM, Word32 *tdm_last_diff_lt_corr, Word16 *q_tdm_last_diff_lt_corr, Word32 *inst_ratio_L_out, Word32 *diff_lt_corr, Word16 *q_d_lt_corr_raw ); -#else -static int16_t stereo_tdm_ener_analysis_SM( CPE_ENC_HANDLE hCPE, Encoder_State **sts, const int16_t input_frame, int16_t *tdm_SM_flag ); + hStereoTD->tdm_prev_stable_idx = LRTD_STEREO_LEFT_IS_PRIM; + move16(); + tdm_LRTD_pri_side = 0; /* right channel */ + move16(); + IF( idx != LRTD_STEREO_RIGHT_IS_PRIM ) + { + tdm_LRTD_pri_side = 1; /* left channel */ + move16(); + hStereoTD->tdm_prev_stable_idx = LRTD_STEREO_RIGHT_IS_PRIM; + move16(); + } + } + ELSE + { + test(); + test(); + idx = limit_idx_Dwnmix_fx( idx, ( hCPE->hStereoClassif->unclr_decision || ( EQ_16( sts[0]->flag_noisy_speech_snr, 1 ) && GT_32( hCPE->hStereoClassif->xtalk_wscore_fx, 214748365 /*0.1f*/ ) ) ), desired_idx, hStereoTD->tdm_last_ratio_idx, hStereoTD->tdm_last_LRTD_PriCh_cnt, hStereoTD->tdm_last_LRTD_frame_cnt ); + } -static void Get_corr_n( const float L[], const float R[], float *ic_Lm, float *ic_Rm, const int16_t len, float *es_em, const int16_t tdm_SM_calc_flag ); + IF( GT_16( abs_s( sub( hStereoTD->tdm_last_ratio_idx, idx ) ), LRTD_STEREO_MID_IS_PRIM ) ) + { + hStereoTD->tdm_last_LRTD_PriCh_cnt = 0; + move16(); + } + ELSE + { + hStereoTD->tdm_last_LRTD_PriCh_cnt = add( hStereoTD->tdm_last_LRTD_PriCh_cnt, 1 ); + move16(); + } + ratio_L_fx = tdm_ratio_tabl_fx[idx]; // Q31 + move32(); -static int16_t stereo_smooth_LR_transition( int16_t *tdm_prev_stable_idx, int16_t *tdm_ratio_transition_mov_flag, int16_t tdm_last_ratio_idx, int16_t *tdm_prev_desired_idx, int16_t *tdm_ratio_transition_cnt, const int16_t tdm_SM_flag, int16_t desired_idx ); + test(); + IF( EQ_16( hStereoTD->tdm_SM_modi_flag, 1 ) && hStereoTD->tdm_LRTD_flag == 0 ) + { + idx = shr( add( hStereoTD->tdm_last_ratio_idx, add( LRTD_STEREO_MID_IS_PRIM, 1 ) ), 1 ); + ratio_L_fx = tdm_ratio_tabl_fx[idx]; // Q31 + move32(); + } -static int16_t limit_idx_Dwnmix( const int16_t idx_in, const int16_t unclr_decision, const int16_t inst_idx, const int16_t previous_idx, const int16_t tdm_last_LRTD_PriCh_cnt, const int16_t tdm_last_LRTD_frame_cnt ); + test(); + test(); + test(); + IF( ( EQ_16( hStereoTD->tdm_ratio_transition_mov_flag, 1 ) && GE_16( hStereoTD->tdm_ratio_transition_cnt, 31 ) ) || ( ( EQ_16( hStereoTD->tdm_last_SM_flag, tdm_SM_flag_loc ) ) && ( EQ_16( idx, hStereoTD->tdm_prev_stable_idx ) ) ) ) + { + hStereoTD->tdm_ratio_transition_cnt = 0; + move16(); + hStereoTD->tdm_ratio_transition_mov_flag = 0; + move16(); + } -static int16_t limit_idx_NoDwnmix( const int16_t idx_in, const int16_t side_can_change, const float d_lt_corr_raw ); + test(); + if ( hStereoTD->tdm_ratio_transition_mov_flag == 0 || tdm_SM_flag_loc == 0 ) + { + hStereoTD->tdm_prev_stable_idx = idx; + move16(); + } -static void Get_LR_rms( const float *Left_in, const float *Right_in, const int16_t input_frame, float *rms_L, float *rms_R ); + /* NOOP ratio calculation */ + IF( tdm_SM_flag_loc ) + { + IF( hStereoTD->tdm_SM_reset_flag ) + { + hStereoTD->tdm_lt_corr_RM_SM_fx = 167772 /*0.01f Q24*/; + move32(); + hStereoTD->tdm_lt_corr_LM_SM_fx = 167772 /*0.01f Q24*/; + move32(); + hStereoTD->tdm_last_ratio_SM_fx = hStereoTD->tdm_last_ratio_fx; + move32(); + hStereoTD->tdm_last_ratio_idx_SM = hStereoTD->tdm_last_ratio_idx; + move16(); + hStereoTD->tdm_lt_rms_L_SM_fx = 2621440; // 40.0f Q16 + move32(); + hStereoTD->tdm_lt_rms_R_SM_fx = 2621440; // 40.0f Q16 + move32(); + hStereoTD->tdm_last_diff_lt_corr_SM_fx = 0; + move32(); + hStereoTD->q_tdm_last_diff_lt_corr_SM = 0; + move16(); + hStereoTD->tdm_last_ener_lt_R_SM_fx = 0; + move32(); + hStereoTD->tdm_last_ener_lt_L_SM_fx = 0; + move32(); -static int16_t Get_dt_lt_ener( CPE_ENC_HANDLE hCPE, const int16_t IsSideMono, const int16_t input_frame, const int16_t tdm_last_SM_flag, const float rms_L, const float rms_R, float *tdm_lt_rms_L, float *tdm_lt_rms_R, float *tdm_last_ener_lt_L, float *tdm_last_ener_lt_R, float *tdm_LT_es_em, int16_t *tdm_hyst_cnt, int16_t *tdm_NOOP_SM_flag_loc, float *ener_R_dt, float *ener_L_dt, float *corr_LM, float *corr_RM ); + hStereoTD->tdm_noop_mov_flag = 0; + move16(); + hStereoTD->tdm_noop_cnt = 0; + move16(); + hStereoTD->tdm_last_SM_flag_noop = 0; + move16(); + hStereoTD->tdm_prev_stable_idx_SM = 0; + move16(); + hStereoTD->tdm_prev_desired_idx_SM = 0; + move16(); + hStereoTD->tdm_LT_es_em_SM_fx = 209715; // 0.1f Q21; + move32(); + hStereoTD->tdm_hyst_cnt_SM = 0; + move16(); + } -static void NOOP_decision( CPE_ENC_HANDLE hCPE, const int16_t tdm_NOOP_flag_loc, const int16_t tmp_SM_flag, const float rms_L, const float rms_R, int16_t *tdm_SM_flag_loc ); + *tdm_ratio_idx_SM = stereo_tdm_ener_analysis_SM_fx( hCPE, sts, input_frame, &tdm_NOOP_flag ); + move16(); + } + ELSE + { + *tdm_ratio_idx_SM = LRTD_STEREO_MID_IS_PRIM; + move16(); + tdm_NOOP_flag = 1; + move16(); + } -static float Comp_diff_lt_corr( CPE_ENC_HANDLE hCPE, const int16_t IsSideMono, const float rms_L, const float rms_R, const float ener_L_dt, const float ener_R_dt, float corr_LM, float corr_RM, const float tdm_lt_rms_L, const float tdm_lt_rms_R, float *tdm_lt_corr_LM, float *tdm_lt_corr_RM, float *tdm_last_diff_lt_corr, float *inst_ratio_L_out, float *diff_lt_corr ); -#endif + sts[0]->tdm_LRTD_flag = hStereoTD->tdm_LRTD_flag; + move16(); + sts[1]->tdm_LRTD_flag = hStereoTD->tdm_LRTD_flag; + move16(); + /* set channel combination scheme flag */ + *tdm_SM_or_LRTD_Pri = tdm_SM_flag_loc; + move16(); + if ( EQ_16( hCPE->hStereoTD->tdm_LRTD_flag, 1 ) ) + { + *tdm_SM_or_LRTD_Pri = tdm_LRTD_pri_side; + move16(); + } -/*-------------------------------------------------------------------* - * Function stereo_tdm_ener_analysis() - * - *-------------------------------------------------------------------*/ + hCPE->hStereoClassif->ratio_L_fx = ratio_L_fx; + move32(); + return idx; +} +#else int16_t stereo_tdm_ener_analysis( const int16_t ivas_format, /* i : IVAS format */ CPE_ENC_HANDLE hCPE, /* i : CPE structure */ @@ -184,93 +792,33 @@ int16_t stereo_tdm_ener_analysis( ) { float rms_R, rms_L; -#ifdef IVAS_FLOAT_FIXED - float diff_lt_corr = 0, ratio_L, dist; -#else float corr_RM, corr_LM, diff_lt_corr = 0, ratio_L, dist; -#endif -#ifdef IVAS_FLOAT_FIXED - Word32 diff_lt_corr_fx; -#endif int16_t i, side_can_change; int16_t idx, tdm_SM_flag_loc; int16_t tmp_SM_flag; -#ifndef IVAS_FLOAT_FIXED float ener_R_dt, ener_L_dt; -#endif int16_t desired_idx; float rms_thd; int16_t tdm_NOOP_flag_loc, tdm_NOOP_flag; STEREO_TD_ENC_DATA_HANDLE hStereoTD; Encoder_State **sts; -#ifndef IVAS_FLOAT_FIXED const float *Left_in, *Right_in; -#endif float d_lt_corr_raw; float inst_ratio_L = 0; -#ifdef IVAS_FLOAT_FIXED - Word32 d_lt_corr_raw_fx; - Word16 q_d_lt_corr_raw; - Word32 inst_ratio_L_fx; -#endif int16_t tdm_LRTD_pri_side; -#ifdef IVAS_FLOAT_FIXED - Word32 rms_L_fx, rms_R_fx; - Word16 q_rms_L, q_rms_R; - Word32 corr_RM_fx, corr_LM_fx; - Word16 q_corr_LM, q_corr_RM; - Word32 ener_R_dt_fx, ener_L_dt_fx; -#if 0 - Word16 Left_in_fx[L_FRAME48k], Right_in_fx[L_FRAME48k]; - Word16 q_Left_in, q_Right_in; -#endif -#endif hStereoTD = hCPE->hStereoTD; sts = hCPE->hCoreCoder; -#ifndef IVAS_FLOAT_FIXED Left_in = sts[0]->input; /* Left channel */ Right_in = sts[1]->input; /* Right channel */ -#endif desired_idx = 0; -#ifdef IVAS_FLOAT_FIXED - inst_ratio_L_fx = 0; - move32(); - diff_lt_corr_fx = 0; - move32(); -#endif /*----------------------------------------------------------------* * Compute L and R energy and Long term RMS of each channel *----------------------------------------------------------------*/ -#ifdef IVAS_FLOAT_FIXED - floatToFixed_arr16( sts[0]->input, sts[0]->input_fx, 0, input_frame ); - floatToFixed_arr16( sts[1]->input, sts[1]->input_fx, 0, input_frame ); -#endif - -#ifdef IVAS_FLOAT_FIXED -#if 1 - Get_LR_rms_fx( sts[0]->input_fx, sts[1]->input_fx, input_frame, &rms_L_fx, &q_rms_L, &rms_R_fx, &q_rms_R ); -#else - /* This part has f2f conversions as sts[0]->input_fx and sts[1]->input_fx are in Q0. Precision loss is observed in later functions.*/ - q_Left_in = Q_factor_arr( sts[0]->input, input_frame ); - q_Right_in = Q_factor_arr( sts[1]->input, input_frame ); - q_Left_in = s_min( q_Left_in, q_Right_in ); - q_Left_in = sub( q_Left_in, Q1 ); - floatToFixed_arr16( sts[0]->input, Left_in_fx, q_Left_in, input_frame ); - floatToFixed_arr16( sts[1]->input, Right_in_fx, q_Left_in, input_frame ); - Get_LR_rms_fx( Left_in_fx, Right_in_fx, input_frame, &rms_L_fx, &q_rms_L, &rms_R_fx, &q_rms_R ); -#endif -#else Get_LR_rms( Left_in, Right_in, input_frame, &rms_L, &rms_R ); -#endif - -#ifdef IVAS_FLOAT_FIXED - rms_L = fixedToFloat_32( rms_L_fx, q_rms_L ); - rms_R = fixedToFloat_32( rms_R_fx, q_rms_R ); -#endif /*----------------------------------------------------------------* * Compute the 1st order energy difference difference @@ -283,39 +831,8 @@ int16_t stereo_tdm_ener_analysis( tdm_SM_flag_loc = hStereoTD->tdm_last_SM_flag; -#ifdef IVAS_FLOAT_FIXED - hStereoTD->tdm_lt_rms_L_fx = floatToFixed_32( hStereoTD->tdm_lt_rms_L, Q16 ); - hStereoTD->tdm_lt_rms_R_fx = floatToFixed_32( hStereoTD->tdm_lt_rms_R, Q16 ); - hStereoTD->tdm_last_ener_lt_L_fx = floatToFixed_32( hStereoTD->tdm_last_ener_lt_L, Q16 ); - hStereoTD->tdm_last_ener_lt_R_fx = floatToFixed_32( hStereoTD->tdm_last_ener_lt_R, Q16 ); - hStereoTD->tdm_LT_es_em_fx = floatToFixed_32( hStereoTD->tdm_LT_es_em, Q21 ); - sts[0]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[0]->hNoiseEst->Etot_last, Q8 ); - sts[1]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[1]->hNoiseEst->Etot_last, Q8 ); - sts[0]->old_corr_fx = float_to_fix16( sts[0]->old_corr, Q15 ); - sts[1]->old_corr_fx = float_to_fix16( sts[1]->old_corr, Q15 ); -#endif - -#ifdef IVAS_FLOAT_FIXED - tmp_SM_flag = Get_dt_lt_ener_fx( hCPE, 0, input_frame, hStereoTD->tdm_last_SM_flag, rms_L_fx, q_rms_L, rms_R_fx, q_rms_R, - &hStereoTD->tdm_lt_rms_L_fx, &hStereoTD->tdm_lt_rms_R_fx, &hStereoTD->tdm_last_ener_lt_L_fx, &hStereoTD->tdm_last_ener_lt_R_fx, - &hStereoTD->tdm_LT_es_em_fx, &hStereoTD->tdm_hyst_cnt, &tdm_NOOP_flag_loc, - &ener_R_dt_fx, &ener_L_dt_fx, &corr_LM_fx, &q_corr_LM, &corr_RM_fx, &q_corr_RM ); -#else tmp_SM_flag = Get_dt_lt_ener( hCPE, 0, input_frame, hStereoTD->tdm_last_SM_flag, rms_L, rms_R, &hStereoTD->tdm_lt_rms_L, &hStereoTD->tdm_lt_rms_R, &hStereoTD->tdm_last_ener_lt_L, &hStereoTD->tdm_last_ener_lt_R, &hStereoTD->tdm_LT_es_em, &hStereoTD->tdm_hyst_cnt, &tdm_NOOP_flag_loc, &ener_R_dt, &ener_L_dt, &corr_LM, &corr_RM ); -#endif - -#ifdef IVAS_FLOAT_FIXED - hStereoTD->tdm_lt_rms_L = fixedToFloat_32( hStereoTD->tdm_lt_rms_L_fx, Q16 ); - hStereoTD->tdm_lt_rms_R = fixedToFloat_32( hStereoTD->tdm_lt_rms_R_fx, Q16 ); - hStereoTD->tdm_last_ener_lt_L = fixedToFloat_32( hStereoTD->tdm_last_ener_lt_L_fx, Q16 ); - hStereoTD->tdm_last_ener_lt_R = fixedToFloat_32( hStereoTD->tdm_last_ener_lt_R_fx, Q16 ); - hStereoTD->tdm_LT_es_em = fixedToFloat_32( hStereoTD->tdm_LT_es_em_fx, Q21 ); - sts[0]->hNoiseEst->Etot_last = fix16_to_float( sts[0]->hNoiseEst->Etot_last_fx, Q8 ); - sts[1]->hNoiseEst->Etot_last = fix16_to_float( sts[1]->hNoiseEst->Etot_last_fx, Q8 ); - hCPE->hStereoClassif->xtalk_fv[E_diff_corrLM_corrRM] = fixedToFloat( hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM], Q21 ); - hCPE->hStereoClassif->xtalk_fv[E_tdm_LT_es_em] = fixedToFloat( hCPE->hStereoClassif->xtalk_fv_fx[E_tdm_LT_es_em], Q21 ); -#endif hStereoTD->tdm_SM_reset_flag = 0; @@ -324,17 +841,7 @@ int16_t stereo_tdm_ener_analysis( * and trigger side/mono configuration if needed *----------------------------------------------------------------*/ -#ifdef IVAS_FLOAT_FIXED - sts[0]->ee_old_fx = floatToFixed( sts[0]->ee_old, Q6 ); - sts[1]->ee_old_fx = floatToFixed( sts[1]->ee_old, Q6 ); - hStereoTD->tdm_last_ratio_fx = floatToFixed( hStereoTD->tdm_last_ratio, Q31 ); -#endif - -#ifdef IVAS_FLOAT_FIXED - NOOP_decision_fx( hCPE, tdm_NOOP_flag_loc, tmp_SM_flag, rms_L_fx, q_rms_L, rms_R_fx, q_rms_R, &tdm_SM_flag_loc ); -#else NOOP_decision( hCPE, tdm_NOOP_flag_loc, tmp_SM_flag, rms_L, rms_R, &tdm_SM_flag_loc ); -#endif /*----------------------------------------------------------------* * Adjust stereo downmixing adaptation rate factor @@ -343,29 +850,8 @@ int16_t stereo_tdm_ener_analysis( * move on speech offset *----------------------------------------------------------------*/ -#ifdef IVAS_FLOAT_FIXED - hStereoTD->tdm_lt_corr_LM_fx = floatToFixed_32( hStereoTD->tdm_lt_corr_LM, Q24 ); - hStereoTD->tdm_lt_corr_RM_fx = floatToFixed_32( hStereoTD->tdm_lt_corr_RM, Q24 ); - hStereoTD->q_tdm_last_diff_lt_corr = Q31; - hStereoTD->tdm_last_diff_lt_corr_fx = floatToFixed_32( hStereoTD->tdm_last_diff_lt_corr, hStereoTD->q_tdm_last_diff_lt_corr ); -#endif - -#ifdef IVAS_FLOAT_FIXED - d_lt_corr_raw_fx = Comp_diff_lt_corr_fx( hCPE, 0, rms_L_fx, q_rms_L, rms_R_fx, q_rms_R, ener_L_dt_fx, ener_R_dt_fx, corr_LM_fx, q_corr_LM, corr_RM_fx, q_corr_RM, hStereoTD->tdm_lt_rms_L_fx, hStereoTD->tdm_lt_rms_R_fx, &hStereoTD->tdm_lt_corr_LM_fx, - &hStereoTD->tdm_lt_corr_RM_fx, &hStereoTD->tdm_last_diff_lt_corr_fx, &hStereoTD->q_tdm_last_diff_lt_corr, &inst_ratio_L_fx, &diff_lt_corr_fx, &q_d_lt_corr_raw ); -#else d_lt_corr_raw = Comp_diff_lt_corr( hCPE, 0, rms_L, rms_R, ener_L_dt, ener_R_dt, corr_LM, corr_RM, hStereoTD->tdm_lt_rms_L, hStereoTD->tdm_lt_rms_R, &hStereoTD->tdm_lt_corr_LM, &hStereoTD->tdm_lt_corr_RM, &hStereoTD->tdm_last_diff_lt_corr, &inst_ratio_L, &diff_lt_corr ); -#endif - -#ifdef IVAS_FLOAT_FIXED - hStereoTD->tdm_lt_corr_LM = fixedToFloat_32( hStereoTD->tdm_lt_corr_LM_fx, Q24 ); - hStereoTD->tdm_lt_corr_RM = fixedToFloat_32( hStereoTD->tdm_lt_corr_RM_fx, Q24 ); - hStereoTD->tdm_last_diff_lt_corr = fixedToFloat_32( hStereoTD->tdm_last_diff_lt_corr_fx, hStereoTD->q_tdm_last_diff_lt_corr ); - diff_lt_corr = fixedToFloat_32( diff_lt_corr_fx, Q24 ); - d_lt_corr_raw = fixedToFloat_32( d_lt_corr_raw_fx, q_d_lt_corr_raw ); - inst_ratio_L = fixedToFloat_32( inst_ratio_L_fx, Q24 ); -#endif /*----------------------------------------------------------------* * UNCLR classifier (detection of uncorrelated L and R channels) @@ -559,11 +1045,7 @@ int16_t stereo_tdm_ener_analysis( } } -#ifdef IVAS_FLOAT_FIXED - idx = stereo_smooth_LR_transition_fx( &hStereoTD->tdm_prev_stable_idx, &hStereoTD->tdm_ratio_transition_mov_flag, hStereoTD->tdm_last_ratio_idx, &hStereoTD->tdm_prev_desired_idx, &hStereoTD->tdm_ratio_transition_cnt, tdm_SM_flag_loc, desired_idx ); -#else idx = stereo_smooth_LR_transition( &hStereoTD->tdm_prev_stable_idx, &hStereoTD->tdm_ratio_transition_mov_flag, hStereoTD->tdm_last_ratio_idx, &hStereoTD->tdm_prev_desired_idx, &hStereoTD->tdm_ratio_transition_cnt, tdm_SM_flag_loc, desired_idx ); -#endif /* Change the switching level in case of dual mono (in case the scenario still accept left right switching */ /* This logic is needed in case the content is exactly the same in the 2 channel and it is expected to get back to LRTD, to prevent the secondary channel to be completely empty */ @@ -639,13 +1121,7 @@ int16_t stereo_tdm_ener_analysis( } else if ( hStereoTD->tdm_LRTD_flag == 1 ) { -#ifdef IVAS_FLOAT_FIXED - q_d_lt_corr_raw = L_get_q1( d_lt_corr_raw ); - d_lt_corr_raw_fx = floatToFixed( d_lt_corr_raw, q_d_lt_corr_raw ); - idx = limit_idx_NoDwnmix_fx( idx, side_can_change, d_lt_corr_raw_fx, q_d_lt_corr_raw ); -#else idx = limit_idx_NoDwnmix( idx, side_can_change, d_lt_corr_raw ); -#endif hStereoTD->tdm_prev_stable_idx = LRTD_STEREO_LEFT_IS_PRIM; tdm_LRTD_pri_side = 0; /* right channel */ if ( idx != LRTD_STEREO_RIGHT_IS_PRIM ) @@ -656,11 +1132,7 @@ int16_t stereo_tdm_ener_analysis( } else { -#ifdef IVAS_FLOAT_FIXED - idx = limit_idx_Dwnmix_fx( idx, ( hCPE->hStereoClassif->unclr_decision || ( sts[0]->flag_noisy_speech_snr == 1 && hCPE->hStereoClassif->xtalk_wscore > 0.1f ) ), desired_idx, hStereoTD->tdm_last_ratio_idx, hStereoTD->tdm_last_LRTD_PriCh_cnt, hStereoTD->tdm_last_LRTD_frame_cnt ); -#else idx = limit_idx_Dwnmix( idx, ( hCPE->hStereoClassif->unclr_decision || ( sts[0]->flag_noisy_speech_snr == 1 && hCPE->hStereoClassif->xtalk_wscore > 0.1f ) ), desired_idx, hStereoTD->tdm_last_ratio_idx, hStereoTD->tdm_last_LRTD_PriCh_cnt, hStereoTD->tdm_last_LRTD_frame_cnt ); -#endif } if ( abs( hStereoTD->tdm_last_ratio_idx - idx ) > LRTD_STEREO_MID_IS_PRIM ) @@ -704,12 +1176,6 @@ int16_t stereo_tdm_ener_analysis( hStereoTD->tdm_last_diff_lt_corr_SM = 0; hStereoTD->tdm_last_ener_lt_R_SM = 0; hStereoTD->tdm_last_ener_lt_L_SM = 0; -#ifdef IVAS_FLOAT_FIXED - hStereoTD->tdm_lt_corr_RM_SM_fx = 167772 /*0.01f Q24*/; - hStereoTD->tdm_lt_corr_LM_SM_fx = 167772 /*0.01f Q24*/; - hStereoTD->tdm_last_ener_lt_R_SM_fx = 0; - hStereoTD->tdm_last_ener_lt_L_SM_fx = 0; -#endif hStereoTD->tdm_noop_mov_flag = 0; hStereoTD->tdm_noop_cnt = 0; @@ -720,49 +1186,7 @@ int16_t stereo_tdm_ener_analysis( hStereoTD->tdm_hyst_cnt_SM = 0; } -#ifdef IVAS_FLOAT_FIXED - floatToFixed_arr16( sts[0]->input, sts[0]->input_fx, 0, input_frame ); - floatToFixed_arr16( sts[1]->input, sts[1]->input_fx, 0, input_frame ); - - hStereoTD->tdm_lt_rms_L_SM_fx = floatToFixed_32( hStereoTD->tdm_lt_rms_L_SM, Q16 ); - hStereoTD->tdm_lt_rms_R_SM_fx = floatToFixed_32( hStereoTD->tdm_lt_rms_R_SM, Q16 ); - hStereoTD->tdm_last_ener_lt_L_SM_fx = floatToFixed_32( hStereoTD->tdm_last_ener_lt_L_SM, Q16 ); - hStereoTD->tdm_last_ener_lt_R_SM_fx = floatToFixed_32( hStereoTD->tdm_last_ener_lt_R_SM, Q16 ); - hStereoTD->tdm_LT_es_em_SM_fx = floatToFixed_32( hStereoTD->tdm_LT_es_em_SM, Q21 ); - sts[0]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[0]->hNoiseEst->Etot_last, Q8 ); - sts[1]->hNoiseEst->Etot_last_fx = float_to_fix16( sts[1]->hNoiseEst->Etot_last, Q8 ); - sts[0]->old_corr_fx = float_to_fix16( sts[0]->old_corr, Q15 ); - sts[1]->old_corr_fx = float_to_fix16( sts[1]->old_corr, Q15 ); - - hStereoTD->tdm_lt_corr_LM_SM_fx = floatToFixed_32( hStereoTD->tdm_lt_corr_LM_SM, Q24 ); - hStereoTD->tdm_lt_corr_RM_SM_fx = floatToFixed_32( hStereoTD->tdm_lt_corr_RM_SM, Q24 ); - hStereoTD->q_tdm_last_diff_lt_corr_SM = Q31; - hStereoTD->tdm_last_diff_lt_corr_SM_fx = floatToFixed_32( hStereoTD->tdm_last_diff_lt_corr_SM, hStereoTD->q_tdm_last_diff_lt_corr_SM ); - - hStereoTD->tdm_last_ratio_SM_fx = floatToFixed_32( hStereoTD->tdm_last_ratio_SM, Q31 ); -#endif - -#ifdef IVAS_FLOAT_FIXED - *tdm_ratio_idx_SM = stereo_tdm_ener_analysis_SM_fx( hCPE, sts, input_frame, &tdm_NOOP_flag ); -#else *tdm_ratio_idx_SM = stereo_tdm_ener_analysis_SM( hCPE, sts, input_frame, &tdm_NOOP_flag ); -#endif - -#ifdef IVAS_FLOAT_FIXED - hStereoTD->tdm_lt_rms_L_SM = fixedToFloat_32( hStereoTD->tdm_lt_rms_L_SM_fx, Q16 ); - hStereoTD->tdm_lt_rms_R_SM = fixedToFloat_32( hStereoTD->tdm_lt_rms_R_SM_fx, Q16 ); - hStereoTD->tdm_last_ener_lt_L_SM = fixedToFloat_32( hStereoTD->tdm_last_ener_lt_L_SM_fx, Q16 ); - hStereoTD->tdm_last_ener_lt_R_SM = fixedToFloat_32( hStereoTD->tdm_last_ener_lt_R_SM_fx, Q16 ); - hStereoTD->tdm_LT_es_em_SM = fixedToFloat_32( hStereoTD->tdm_LT_es_em_SM_fx, Q21 ); - sts[0]->hNoiseEst->Etot_last = fix16_to_float( sts[0]->hNoiseEst->Etot_last_fx, Q8 ); - sts[1]->hNoiseEst->Etot_last = fix16_to_float( sts[1]->hNoiseEst->Etot_last_fx, Q8 ); - hCPE->hStereoClassif->xtalk_fv[E_diff_corrLM_corrRM] = fixedToFloat( hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM], Q21 ); - hCPE->hStereoClassif->xtalk_fv[E_tdm_LT_es_em] = fixedToFloat( hCPE->hStereoClassif->xtalk_fv_fx[E_tdm_LT_es_em], Q21 ); - - hStereoTD->tdm_lt_corr_LM_SM = fixedToFloat_32( hStereoTD->tdm_lt_corr_LM_SM_fx, Q24 ); - hStereoTD->tdm_lt_corr_RM_SM = fixedToFloat_32( hStereoTD->tdm_lt_corr_RM_SM_fx, Q24 ); - hStereoTD->tdm_last_diff_lt_corr_SM = fixedToFloat_32( hStereoTD->tdm_last_diff_lt_corr_SM_fx, hStereoTD->q_tdm_last_diff_lt_corr_SM ); -#endif } else { @@ -785,7 +1209,7 @@ int16_t stereo_tdm_ener_analysis( return ( idx ); } - +#endif #ifdef IVAS_FLOAT_FIXED /*-------------------------------------------------------------------* @@ -1046,7 +1470,7 @@ static Word16 Get_dt_lt_ener_fx( hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM] = BASOP_Util_Add_Mant32Exp( *corr_LM, sub( Q31, *q_corr_LM ), L_negate( *corr_RM ), sub( Q31, *q_corr_RM ), &exp_diff ); move32(); - hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM] = L_shl( hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM], sub( Q21, sub( Q31, exp_diff ) ) ); + hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM] = L_shl( hCPE->hStereoClassif->xtalk_fv_fx[E_diff_corrLM_corrRM], sub( Q15, sub( Q31, exp_diff ) ) ); // Q15 move32(); IF( sts[0]->hVAD->hangover_cnt != 0 ) @@ -1060,7 +1484,7 @@ static Word16 Get_dt_lt_ener_fx( move32(); } - hCPE->hStereoClassif->xtalk_fv_fx[E_tdm_LT_es_em] = *tdm_LT_es_em; // Q21 + hCPE->hStereoClassif->xtalk_fv_fx[E_tdm_LT_es_em] = L_shr( *tdm_LT_es_em, 6 ); // Q15 move32(); tmp_SM_flag = 0; diff --git a/lib_enc/ivas_stereo_td_enc.c b/lib_enc/ivas_stereo_td_enc.c index 22f387acd4349e81b04d666585d8a8fc859078be..f97989cfb75cde11bda5ea6ae9f00cab5fa90f6b 100644 --- a/lib_enc/ivas_stereo_td_enc.c +++ b/lib_enc/ivas_stereo_td_enc.c @@ -142,12 +142,12 @@ void stereo_td_init_enc_fx( const Word16 last_element_mode /* i : last element mode */ ) { - hStereoTD->tdm_lt_corr_RM_fx = 21474836; // Q31 - hStereoTD->tdm_lt_corr_LM_fx = 21474836; + hStereoTD->tdm_lt_corr_RM_fx = 167772; // Q24 + hStereoTD->tdm_lt_corr_LM_fx = 167772; // Q24 hStereoTD->tdm_last_ratio_fx = 1073741824; // Q31 hStereoTD->tdm_last_ratio_idx = LRTD_STEREO_MID_IS_PRIM; - hStereoTD->tdm_lt_rms_L_fx = 671088640; // Q24 - hStereoTD->tdm_lt_rms_R_fx = 671088640; // Q24 + hStereoTD->tdm_lt_rms_L_fx = 2621440; // Q16 + hStereoTD->tdm_lt_rms_R_fx = 2621440; // Q16 hStereoTD->tdm_last_diff_lt_corr_fx = 0; hStereoTD->q_tdm_last_diff_lt_corr = Q31; hStereoTD->tdm_last_ener_lt_R_fx = 0; @@ -163,7 +163,7 @@ void stereo_td_init_enc_fx( hStereoTD->tdm_prev_stable_idx = LRTD_STEREO_MID_IS_PRIM; hStereoTD->tdm_prev_desired_idx = LRTD_STEREO_MID_IS_PRIM; hStereoTD->tdm_FD2LRTD_SW_cnt = 0; - hStereoTD->tdm_LT_es_em_fx = 214748364; // Q31 + hStereoTD->tdm_LT_es_em_fx = 209715; // Q21 hStereoTD->tdm_hyst_cnt = 0; /* NOOP parameters */ hStereoTD->tdm_lt_corr_RM_SM_fx = 21474836; // Q31 diff --git a/lib_enc/lib_enc.c b/lib_enc/lib_enc.c index 44f07700bef5511512e7433240c8384ef31f1d6e..a7e986843dc5a311fb48443992b503cee88174b8 100644 --- a/lib_enc/lib_enc.c +++ b/lib_enc/lib_enc.c @@ -1568,7 +1568,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( st_ivas->codec_mode = MODE1; move16(); test(); - IF( NE_16( hEncoderConfig->element_mode_init, EVS_MONO ) || st_ivas->hEncoderConfig->stereo_dmx_evs ) + IF( NE_16( hEncoderConfig->element_mode_init, EVS_MONO ) ) { reset_rf_indices( hCoreCoder->hRF, hCoreCoder->L_frame, &( hCoreCoder->rf_target_bits_write ) ); } @@ -1593,7 +1593,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( if ( hEncoderConfig->Opt_RF_ON == 0 && EQ_16( hEncoderConfig->ivas_format, MONO_FORMAT ) ) { test(); - IF( NE_16( hEncoderConfig->element_mode_init, EVS_MONO ) || st_ivas->hEncoderConfig->stereo_dmx_evs ) + IF( NE_16( hEncoderConfig->element_mode_init, EVS_MONO ) ) { reset_rf_indices( hCoreCoder->hRF, hCoreCoder->L_frame, &( hCoreCoder->rf_target_bits_write ) ); } @@ -1706,7 +1706,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( IF( hIvasEnc->switchingActive && EQ_16( hEncoderConfig->ivas_format, MONO_FORMAT ) ) { test(); - IF( NE_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) || st_ivas->hEncoderConfig->stereo_dmx_evs ) + IF( NE_16( st_ivas->hEncoderConfig->element_mode_init, EVS_MONO ) ) { copy_encoder_config( st_ivas, hCoreCoder, 0 ); } @@ -1731,7 +1731,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( IF( hEncoderConfig->Opt_AMR_WB ) { - IF( NE_16( hEncoderConfig->element_mode_init, EVS_MONO ) || st_ivas->hEncoderConfig->stereo_dmx_evs ) + IF( NE_16( hEncoderConfig->element_mode_init, EVS_MONO ) ) { amr_wb_enc( hCoreCoder, inputBuffer, st_ivas->mem_hp20_in[0], inputBufferSize ); } @@ -1743,6 +1743,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( ELSE { test(); +#ifndef IVAS_FLOAT_FIXED IF( NE_16( hEncoderConfig->element_mode_init, EVS_MONO ) || st_ivas->hEncoderConfig->stereo_dmx_evs ) { IF( NE_32( ( error = evs_enc( hCoreCoder, inputBuffer, st_ivas->mem_hp20_in[0], inputBufferSize ) ), IVAS_ERR_OK ) ) @@ -1751,6 +1752,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( } } ELSE +#else { hCoreCoder->input_frame_fx = inputBufferSize; move32(); @@ -1759,6 +1761,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( return error; } } +#endif } } else /* IVAS */ @@ -1771,7 +1774,7 @@ ivas_error IVAS_ENC_EncodeFrameToSerial( /* write indices into bitstream buffer */ test(); - IF( EQ_16( hEncoderConfig->element_mode_init, EVS_MONO ) && !st_ivas->hEncoderConfig->stereo_dmx_evs ) + IF( EQ_16( hEncoderConfig->element_mode_init, EVS_MONO ) ) { test(); IF( EQ_16( hEncoderConfig->ivas_format, MONO_FORMAT ) && EQ_16( hCoreCoder->element_mode, EVS_MONO ) ) diff --git a/lib_enc/lp_exc_e_fx.c b/lib_enc/lp_exc_e_fx.c index 9562eabf87f50d1b8a651bfcb05fa8e6aecb7974..d45f959731fb4dda19f657afce18dffbd3d4e6db 100644 --- a/lib_enc/lp_exc_e_fx.c +++ b/lib_enc/lp_exc_e_fx.c @@ -171,6 +171,158 @@ Word16 lp_filt_exc_enc_fx( return select; } +Word16 lp_filt_exc_enc_ivas_fx( + const Word16 codec_mode, /* i : MODE1 or MODE2 Q0 */ + const Word16 coder_type, /* i : coding type Q0 */ + const Word16 i_subfr, /* i : subframe index Q0 */ + Word16 *exc, /* i/o: pointer to excitation signal frame Q_new */ + const Word16 *h1, /* i : weighted filter input response Q(14+shift) */ + const Word16 *xn, /* i : target vector Q_new-1+shift */ + Word16 *y1, /* o : zero-memory filtered adaptive excitation Q_new-1+shift */ + Word16 *xn2, /* o : target vector for innovation search Q_new-1+shift */ + const Word16 L_subfr, /* i : length of vectors for gain quantization Q0 */ + const Word16 L_frame, /* i : frame size Q0 */ + Word16 *g_corr, /* o : ACELP correlation values mant/exp */ + const Word16 clip_gain, /* i : adaptive gain clipping flag Q0 */ + Word16 *gain_pit, /* o : adaptive excitation gain Q14 */ + Word16 *lp_flag /* i/o: mode selection Q0 */ +) +{ + Word16 gain1, gain2, g_corr2[4], exc_tmp[5 * L_SUBFR], xn2_tmp[5 * L_SUBFR]; + Word16 y1_tmp[5 * L_SUBFR]; + Word16 select, i, exp_ener, exp_ener1; + Word16 wtmp, wtmp1; + Word32 Ltmp; + + Word16 use_prev_sf_pit_gain = 0; + + gain1 = 0; + move16(); + gain2 = 0; + move16(); + + /*----------------------------------------------------------------* + * Find the target energy if the adaptive exc. is not filtered + *----------------------------------------------------------------*/ + test(); + IF( EQ_16( codec_mode, MODE2 ) && EQ_16( coder_type, 100 ) ) + { + use_prev_sf_pit_gain = 1; + } + exp_ener = 0; + move16(); + wtmp = 0; + move16(); + test(); + IF( EQ_16( *lp_flag, FULL_BAND ) || EQ_16( *lp_flag, NORMAL_OPERATION ) ) + { + IF( use_prev_sf_pit_gain == 1 ) + { + wtmp = adpt_enr_fx( codec_mode, &exc[i_subfr], h1, y1, L_subfr, gain_pit, g_corr, clip_gain, xn, xn2, &exp_ener, use_prev_sf_pit_gain ); + } + else + { + wtmp = adpt_enr_fx( codec_mode, &exc[i_subfr], h1, y1, L_subfr, &gain1, g_corr, clip_gain, xn, xn2, &exp_ener, use_prev_sf_pit_gain ); + } + } + + /*----------------------------------------------------------------* + * Filter the adaptive excitation + * Find the target energy if the adapt. exc. is filtered + *----------------------------------------------------------------*/ + + exp_ener1 = 0; + move16(); + wtmp1 = 0; + move16(); + test(); + IF( ( EQ_16( *lp_flag, LOW_PASS ) ) || ( EQ_16( *lp_flag, NORMAL_OPERATION ) ) ) + { + test(); + IF( EQ_16( codec_mode, MODE2 ) && EQ_16( L_frame, L_FRAME16k ) ) + { + FOR( i = 0; i < L_subfr; i++ ) + { + Ltmp = L_mult( 6881, exc[i - 1 + i_subfr] ); /* constants in Q15 */ + Ltmp = L_mac( Ltmp, 19005, exc[i + i_subfr] ); + Ltmp = L_mac( Ltmp, 6881, exc[i + 1 + i_subfr] ); + exc_tmp[i] = round_fx( Ltmp ); + } + } + ELSE + { + FOR( i = 0; i < L_subfr; i++ ) + { + Ltmp = L_mult( 5898, exc[i - 1 + i_subfr] ); /* constants in Q15 */ + Ltmp = L_mac( Ltmp, 20972, exc[i + i_subfr] ); + Ltmp = L_mac( Ltmp, 5898, exc[i + 1 + i_subfr] ); + exc_tmp[i] = round_fx( Ltmp ); + } + } + IF( use_prev_sf_pit_gain == 1 ) + { + wtmp1 = adpt_enr_fx( codec_mode, exc_tmp, h1, y1_tmp, L_subfr, &gain2, gain_pit, clip_gain, xn, xn2_tmp, &exp_ener1, use_prev_sf_pit_gain ); + } + ELSE + { + wtmp1 = adpt_enr_fx( codec_mode, exc_tmp, h1, y1_tmp, L_subfr, &gain2, g_corr2, clip_gain, xn, xn2_tmp, &exp_ener1, use_prev_sf_pit_gain ); + } + } + + if ( LT_16( exp_ener, exp_ener1 ) ) + { + wtmp = shr( wtmp, 1 ); + } + + if ( GT_16( exp_ener, exp_ener1 ) ) + { + wtmp1 = shr( wtmp1, 1 ); + } + + /*-----------------------------------------------------------------* + * use the best prediction (minimize quadratic error) + *-----------------------------------------------------------------*/ + + test(); + test(); + IF( ( ( LT_16( wtmp1, wtmp ) ) && ( EQ_16( *lp_flag, NORMAL_OPERATION ) ) ) || ( EQ_16( *lp_flag, LOW_PASS ) ) ) + { + /* use the LP filter for pitch excitation prediction */ + select = LOW_PASS; + move16(); + Copy( exc_tmp, &exc[i_subfr], L_subfr ); + Copy( y1_tmp, y1, L_subfr ); + Copy( xn2_tmp, xn2, L_subfr ); + + IF( use_prev_sf_pit_gain == 0 ) + { + *gain_pit = gain2; + move16(); + g_corr[0] = g_corr2[0]; + move16(); + g_corr[1] = g_corr2[1]; + move16(); + g_corr[2] = g_corr2[2]; + move16(); + g_corr[3] = g_corr2[3]; + move16(); + } + } + ELSE + { + /* no LP filter used for pitch excitation prediction */ + select = FULL_BAND; + move16(); + IF( use_prev_sf_pit_gain == 0 ) + { + *gain_pit = gain1; + move16(); + } + } + + return select; +} + /*-------------------------------------------------------------------* * adpt_enr_fx() * diff --git a/lib_enc/multi_harm_fx.c b/lib_enc/multi_harm_fx.c index 2611fec4b542579be041c8e16f7afe2a64afe93d..2277ad07e98be7f69bdc14b5af61ad4dc526f259 100644 --- a/lib_enc/multi_harm_fx.c +++ b/lib_enc/multi_harm_fx.c @@ -375,3 +375,362 @@ Word16 multi_harm_fx( /* o : frame multi-harmonicity } return harm; } +#ifdef IVAS_FLOAT_FIXED +Word16 multi_harm_ivas_fx( /* o : frame multi-harmonicity (1-harmonic, 0-not) */ + const Word16 Bin_E[], /* i : log-energy spectrum of the current frame Q7 */ + Word16 old_S[], /* i/o: prev. log-energy spectrum w. subtracted floor Q7 */ + Word16 cor_map_LT[], /* i/o: LT correlation map Q15 */ + Word16 *multi_harm_limit, /* i/o: multi harminic threshold Q9 */ + const Word32 total_brate, /* i : total bitrate Q0 */ + const Word16 bwidth, /* i : input signal bandwidth Q0 */ + Word16 *cor_strong_limit, /* i/o: HF correlation indicator Q0 */ + Word16 *st_mean_avr_dyn, /* i/o: long term average dynamic Q7 */ + Word16 *st_last_sw_dyn, /* i/o: last dynamic Q7 */ + Word16 *cor_map_sum, /* i : sum of correlation map Q8 */ + Word16 *sp_floor, /* o: noise floor estimate Q7 */ + Word16 S_map[] /* o : short-term correlation map Q7 */ +) +{ + Word16 i, j, k, L, stemp, N_mins, ind_mins[L_FFT / 4], *pt_mins, harm; + Word16 S[L_FFT / 2], flor, step, sign_fx, tmp16, tmp2, ExpInd, tmpdB, ExpdB, Expx2, Expy2; + Word16 corx2, cory2, corxy, cor, cor_map[L_FFT / 2], *pt1, *pt2, cor_strong; + Word32 L_acc; + Word32 Lcorx2, Lcory2, Lcorxy, Lcor_map_LT_sum; + Word16 mean_dyn; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + + /*------------------------------------------------------------------* + * initialization + *------------------------------------------------------------------*/ + + /* length of the useful part of the spectrum (up to 6.4kHz) */ + L = L_FFT / 2; + move16(); + if ( EQ_16( bwidth, NB ) ) + { + /* length of the useful part of the spectrum (up to 3.6kHz) */ + L = 76; + move16(); + } + + Copy( Bin_E, S, L ); + + /*------------------------------------------------------------------* + * searching of spectral maxima and minima + *------------------------------------------------------------------*/ + + pt_mins = ind_mins; + + /* index of the first minimum */ + if ( LT_16( Bin_E[0], Bin_E[1] ) ) + { + *pt_mins++ = 0; + move16(); + } + + FOR( i = 1; i < L - 1; i++ ) + { + /* minimum found */ + test(); + if ( LT_16( Bin_E[i], Bin_E[i - 1] ) && LT_16( Bin_E[i], Bin_E[i + 1] ) ) + { + *pt_mins++ = i; + move16(); + } + } + + /* index of the last minimum */ + IF( LT_16( Bin_E[L - 1], Bin_E[L - 2] ) ) + { + *pt_mins++ = sub( L, 1 ); + move16(); + } + + /* total number of minimas found */ + N_mins = (Word16) ( pt_mins - ind_mins - 1 ); + move16(); + + /*------------------------------------------------------------------* + * calculation of the spectral floor + * subtraction of the spectral floor + *------------------------------------------------------------------*/ + + set16_fx( S, 0, L ); + + IF( N_mins > 0 ) + { + L_acc = L_deposit_l( 0 ); + FOR( i = 0; i < N_mins; ++i ) + { + L_acc = L_mac0( L_acc, Bin_E[ind_mins[i]], 1 ); + } + *sp_floor = extract_l( Mult_32_16( L_acc, div_s( 1, N_mins ) ) ); + move16(); + + set16_fx( S, 0, ind_mins[0] ); + set16_fx( &S[ind_mins[N_mins]], 0, sub( shr( L_FFT, 1 ), ind_mins[N_mins] ) ); + + pt_mins = ind_mins; + + flor = 0; + move16(); + step = 0; + move16(); + + FOR( i = ind_mins[0]; i < ind_mins[N_mins]; i++ ) + { + /* we are at the end of the next minimum */ + IF( EQ_16( i, *pt_mins ) ) + { + pt_mins++; + flor = Bin_E[i]; + move16(); /*Q7*/ + /* calculate the new step */ + /*step = (Bin_E[*pt_mins] - Bin_E[i]) / (*pt_mins-i);*/ + tmp16 = sub( *pt_mins, i ); + tmpdB = sub( Bin_E[*pt_mins], Bin_E[i] ); + sign_fx = shr( tmpdB, 15 ); /* 0 if positive else -1 */ + ExpdB = sub( norm_s( tmpdB ), 1 ); + tmpdB = abs_s( shl( tmpdB, ExpdB ) ); + ExpInd = norm_s( tmp16 ); + tmp16 = shl( tmp16, ExpInd ); + tmp16 = div_s( tmpdB, tmp16 ); + tmp16 = sub( s_xor( tmp16, sign_fx ), sign_fx ); + step = shr( tmp16, add( sub( ExpdB, ExpInd ), 15 ) ); /* Q7 */ + } + + /* subtract the floor */ + S[i] = s_max( sub_sat( Bin_E[i], flor ), 0 ); + move16(); + + /* update the floor */ + flor = add( flor, step ); /*Q7*/ + } + } + + /* Calculate the maximum dynamic per band */ + /* since we are processing 40 bins we will use 1/40 in Q15 to find the mean */ + /* mean_dyn = mean(&S[L-40], 40);*/ + L_acc = L_deposit_l( 0 ); + FOR( i = L - 40; i < L; i++ ) + { + L_acc = L_mac( L_acc, S[i], 819 /*1 / 40 * (2 ^ 15)*/ ); + } + mean_dyn = round_fx( L_acc ); /*Q7*/ + + /*mean_dyn = 0.6f * *st_mean_avr_dyn + 0.4f * mean_dyn;*/ + L_acc = L_mult( 13107 /*0.4f*/, mean_dyn ); /*Q23*/ + L_acc = L_mac( L_acc, 19661 /*0.6f*/, *st_mean_avr_dyn ); /*Q23*/ + mean_dyn = round_fx( L_acc ); /*Q7*/ + + test(); + IF( LT_16( mean_dyn, 1229 ) /*9.6f*/ && *cor_strong_limit != 0 ) + { + *cor_strong_limit = 0; + move16(); + *st_last_sw_dyn = mean_dyn; + move16(); + } + ELSE IF( GT_16( sub( mean_dyn, *st_last_sw_dyn ), 576 ) /*4.5f*/ ) + { + *cor_strong_limit = 1; + move16(); + } + test(); + if ( LT_32( total_brate, ACELP_9k60 ) || GT_32( total_brate, ACELP_16k40 ) ) + { + *cor_strong_limit = 1; + move16(); + } + + *st_mean_avr_dyn = mean_dyn; + move16(); + + /*------------------------------------------------------------------* + * calculation of the correlation map + *------------------------------------------------------------------*/ + + set16_fx( cor_map, 0, L ); + IF( N_mins > 0 ) + { + Lcorx2 = L_deposit_l( 0 ); + Lcorxy = L_deposit_l( 0 ); + stemp = ind_mins[0]; + move16(); + Lcory2 = L_mult( old_S[stemp], old_S[stemp] ); + k = 1; + move16(); + + FOR( i = add( stemp, 1 ); i <= ind_mins[N_mins]; i++ ) + { + IF( EQ_16( i, ind_mins[k] ) ) + { + /* include the last peak point (new minimum) to the corr. sum */ +#ifdef BASOP_NOGLOB + Lcory2 = L_mac_o( Lcory2, old_S[i], old_S[i], &Overflow ); +#else /* BASOP_NOGLOB */ + Lcory2 = L_mac( Lcory2, old_S[i], old_S[i] ); +#endif /* BASOP_NOGLOB */ + + /* calculation of the norm. peak correlation */ + test(); + IF( Lcorx2 != 0 && Lcory2 != 0 ) + { + /* corxy * corxy*/ + tmp16 = sub( norm_l( Lcorxy ), 1 ); + corxy = extract_h( L_shl( Lcorxy, tmp16 ) ); + corxy = mult_r( corxy, corxy ); + /* (corx2 * cory2) */ + Expx2 = norm_l( Lcorx2 ); + Expy2 = norm_l( Lcory2 ); + corx2 = extract_h( L_shl( Lcorx2, Expx2 ) ); + cory2 = extract_h( L_shl( Lcory2, Expy2 ) ); + corx2 = mult_r( corx2, cory2 ); + Expx2 = add( Expy2, Expx2 ); + /* Validate num < den */ + cor = sub( corx2, corxy ); + cor = shr( cor, 15 ); + /* Add 1 to tmp16 & shr by 2 if corxy > corx2 */ + tmp16 = sub( tmp16, cor ); + corxy = shl( corxy, cor ); + corxy = shl( corxy, cor ); + /* cor = corxy * corxy / (corx2 * cory2) */ + corxy = div_s( corxy, corx2 ); +#ifdef BASOP_NOGLOB + cor = shr_o( corxy, sub( shl( tmp16, 1 ), Expx2 ), &Overflow ); /* Q15 */ +#else /* BASOP_NOGLOB */ + cor = shr( corxy, sub( shl( tmp16, 1 ), Expx2 ) ); /* Q15 */ +#endif /* BASOP_NOGLOB */ + } + ELSE + { + cor = 0; + move16(); + } + + /* save the norm. peak correlation in the correlation map */ + FOR( j = ind_mins[k - 1]; j < ind_mins[k]; j++ ) + { + old_S[j] = S[j]; + move16(); + S[j] = shr( cor, 8 ); + move16(); + cor_map[j] = cor; + move16(); + } + + Lcorx2 = L_deposit_l( 0 ); + Lcory2 = L_deposit_l( 0 ); + Lcorxy = L_deposit_l( 0 ); + + k = add( k, 1 ); + } +#ifdef BASOP_NOGLOB + Lcorx2 = L_mac_o( Lcorx2, S[i], S[i], &Overflow ); + Lcory2 = L_mac_o( Lcory2, old_S[i], old_S[i], &Overflow ); + Lcorxy = L_mac_o( Lcorxy, S[i], old_S[i], &Overflow ); +#else /* BASOP_NOGLOB */ + Lcorx2 = L_mac( Lcorx2, S[i], S[i] ); + Lcory2 = L_mac( Lcory2, old_S[i], old_S[i] ); + Lcorxy = L_mac( Lcorxy, S[i], old_S[i] ); +#endif + } + + Copy( S, old_S, ind_mins[0] ); + Copy( &S[ind_mins[N_mins]], &old_S[ind_mins[N_mins]], sub( L, ind_mins[N_mins] ) ); + } + ELSE + { + *sp_floor = Bin_E[0]; + move16(); + } + *sp_floor = mult( *sp_floor, 14231 /*1.0f / logf( 10.0f ) Q15*/ ); + move16(); /* Convert to log10() */ + + /*------------------------------------------------------------------* + * updating of the long-term correlation map + * summation of the long-term correlation map + *------------------------------------------------------------------*/ + + Lcor_map_LT_sum = L_deposit_l( 0 ); + tmp2 = 0; + move16(); + + cor_strong = 0; + move16(); + pt1 = cor_map_LT; + move16(); + pt2 = cor_map; + move16(); + FOR( i = 0; i < L; i++ ) + { + /* tmp2 += S[i]; */ + tmp2 = add( tmp2, shl( S[i], 1 ) ); /* tmp2 in Q8; max value is 128) */ + + /* *pt1 = M_ALPHA_FX * *pt1 + (1-M_ALPHA_FX) * *pt2++ */ + *pt1 = mac_r( L_mult( ONE_MINUS_M_ALPHA, *pt2 ), M_ALPHA_FX, *pt1 ); + move16(); + + /* cor_map_LT_sum += *pt1 */ + Lcor_map_LT_sum = L_add( Lcor_map_LT_sum, *pt1 ); /* cor_map_LT_sum in Q15; max value is 128) */ + + if ( GT_16( *pt1, 31130 ) /*0.95f*/ ) + { + cor_strong = 1; + move16(); + } + + pt1++; + pt2++; + } + + IF( EQ_16( bwidth, NB ) ) + { + /* cor_map_LT_sum *= 1.53f; */ + /* tmp2 *= 1.53f; */ + Lcor_map_LT_sum = L_shl( Mult_32_16( Lcor_map_LT_sum, 25068 /*1.53f Q14*/ ), 1 ); + tmp2 = round_fx( L_mac( L_mult( tmp2, 32767 ), tmp2, 17367 /*0.53 Q15*/ ) ); + } + *cor_map_sum = tmp2; + move16(); + + /* final decision about multi-harmonicity */ + harm = 0; + move16(); + test(); + if ( ( L_msu0( Lcor_map_LT_sum, *multi_harm_limit, 64 ) > 0 ) || ( cor_strong != 0 ) ) + { + harm = 1; + move16(); + } + + /*------------------------------------------------------------------* + * updating of the decision threshold + *------------------------------------------------------------------*/ + + stemp = add( *multi_harm_limit, THR_CORR_STEP_FX ); + if ( GT_32( Lcor_map_LT_sum, THR_CORR_FX ) ) /* Q15 */ + { + /* *multi_harm_limit -= THR_CORR_STEP_FX */ + stemp = sub( *multi_harm_limit, THR_CORR_STEP_FX ); + } + + stemp = s_min( stemp, THR_CORR_MAX_FX ); + *multi_harm_limit = s_max( stemp, THR_CORR_MIN_FX ); + move16(); + + IF( N_mins <= 0 ) + { + set16_fx( old_S, 0, L ); + } + IF( S_map != NULL ) + { + Copy( S, S_map, L ); + } + return harm; +} + +#endif diff --git a/lib_enc/nois_est_fx.c b/lib_enc/nois_est_fx.c index 8b4c2b99af7f16154b4dc92b8cdb2b33aae990a3..045b6687951202d5a156d909cefa9ff11f0ccc61 100644 --- a/lib_enc/nois_est_fx.c +++ b/lib_enc/nois_est_fx.c @@ -2025,6 +2025,7 @@ void noise_est_ivas_fx( const Word32 enr[], /* i : averaged energy over both subframes Q_new + Q_SCALE */ Word32 fr_bands[], /* i : spectrum per critical bands of the current frame Q_new + Q_SCALE */ Word16 *cor_map_sum, /* o : Q8 */ + Word16 *ncharX, /* o : Q11 */ Word16 *sp_div, /* o : Q_sp_div */ Word16 *Q_sp_div, /* o : Q factor for sp_div */ Word16 *non_staX, /* o : non-stationarity for sp/mus classifier */ @@ -2338,6 +2339,30 @@ void noise_est_ivas_fx( #endif /* BASOP_NOGLOB */ } + if ( ncharX != NULL ) + { + *ncharX = noise_chartmp; /* Q11 */ + move16(); + } + + IF( hStereoClassif != NULL ) + { + IF( st_fx->idchan == 0 ) + { + hStereoClassif->nchar_ch1_fx = noise_chartmp; /* Q11 */ + move32(); + hStereoClassif->nchar_ch1_e = 31 - Q11; + move16(); + } + ELSE + { + hStereoClassif->nchar_ch2_fx = noise_chartmp; /* Q11 */ + move32(); + hStereoClassif->nchar_ch2_e = 31 - Q11; + move16(); + } + } + noise_chartmp = s_min( noise_chartmp, (Word16) 10 << 11 ); /* Q11 */ /* update LT value of the final parameter */ diff --git a/lib_enc/pit_enc_fx.c b/lib_enc/pit_enc_fx.c index 93edebeea35b4c9795ce1d32ad18574d97310926..b49e52fad2cb086226cf9b610aece67c1389e60d 100644 --- a/lib_enc/pit_enc_fx.c +++ b/lib_enc/pit_enc_fx.c @@ -13,6 +13,7 @@ #include "prot.h" /* Function prototypes */ #include "prot_fx.h" /* Function prototypes */ #include "prot_fx_enc.h" /* Function prototypes */ +#include "prot_fx_enc.h" /* Function prototypes */ #define inv_T0_res InvIntTable diff --git a/lib_enc/pitch_ol2.c b/lib_enc/pitch_ol2.c index 1b55d5a7665f07155749e9b7c9a54df3d7bfc3dc..6c5ed0123b9375a0bcd37d68df9ad7fef9fe7819 100644 --- a/lib_enc/pitch_ol2.c +++ b/lib_enc/pitch_ol2.c @@ -41,6 +41,11 @@ #include "prot.h" #include "wmc_auto.h" +#ifdef IVAS_FLOAT_FIXED +#include "prot_fx.h" /* Function prototypes */ +#include "prot_fx_enc.h" /* Function prototypes */ +#endif // IVAS_FLOAT_FIXED + /*-------------------------------------------------------------------* * Local constants *-------------------------------------------------------------------*/ @@ -164,7 +169,227 @@ void pitch_ol2( * * Very short stable pitch detection *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void StableHighPitchDetect_ivas_fx( + Word16 *flag_spitch, /* o : flag to indicate very short stable pitch */ + Word16 pitch[], /* i/o: OL pitch buffer */ + const Word16 voicing[], /* i : OL pitch gains */ + const Word16 wsp[], /* i : weighted speech */ + const Word16 localVAD, /* i : local VAD flag */ + Word16 *voicing_sm, /* i/o: smoothed open-loop pitch gains */ + Word16 *voicing0_sm, /* i/o: smoothed high pitch gains */ + Word16 *LF_EnergyRatio_sm, /* i/o: smoothed [0, 300Hz] relative peak energy*/ + Word16 *predecision_flag, /* i/o: predecision flag */ + Word32 *diff_sm, /* i/o: smoothed pitch frequency difference */ + Word32 *energy_sm, /* i/o: smoothed energy around pitch frequency */ + Word16 Q_new, + Word16 EspecdB[] ) +{ + Word16 i, pitch_freq_point; + Word16 T, Tp, pit_min; + Word16 energy0_16, energy1_16, ratio, voicing_m; + Word32 energy0, energy1, cor_max, diff, sum_energy; + const Word16 *pt_wsp; + Word16 tmp, tmp1, exp, diff16, cor_max16, exp1, exp2, pit_min_up; + Word32 L_tmp, L_tmp1; + Word16 Top; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + + /*voicing = (voicing[0] + voicing[1] + voicing[2] )/3;*/ + L_tmp = L_mult( voicing[0], 10923 ); + L_tmp = L_mac( L_tmp, voicing[1], 10923 ); + L_tmp = L_mac( L_tmp, voicing[2], 10923 ); +#ifdef BASOP_NOGLOB + voicing_m = round_fx_sat( L_tmp ); +#else + voicing_m = round_fx( L_tmp ); +#endif + /**voicing_sm = 0.75f*(*voicing_sm) + 0.25f*voicing;*/ + *voicing_sm = round_fx( L_mac( L_mult( *voicing_sm, 24576 ), voicing_m, 8192 ) ); + move16(); + + /* pitch_freq_point = (short)(L_FFT/(mult_fact*T_op[1])+0.5f);*/ + Top = pitch[1]; + move16(); + pitch_freq_point = idiv1616( L_FFT, Top ); /* Q0*/ + diff = L_deposit_l( 0 ); + sum_energy = L_deposit_l( 0 ); + FOR( i = 1; i < 2 * pitch_freq_point; i++ ) + { + diff = L_add( diff, sub( EspecdB[pitch_freq_point], EspecdB[i] ) ); + sum_energy = L_add( sum_energy, EspecdB[i] ); + } + /*sum_energy /= (2*pitch_freq_point-1);*/ + tmp = sub( shl( pitch_freq_point, 1 ), 1 ); + exp = norm_s( tmp ); + tmp1 = div_s( shl( 1, sub( 14, exp ) ), tmp ); /*Q(29-exp)*/ + L_tmp = Mult_32_16( sum_energy, tmp1 ); + sum_energy = L_shl( L_tmp, sub( exp, 14 ) ); + /**diff_sm = 0.2f * diff + 0.8f * *diff_sm;*/ + *diff_sm = L_add( Mult_32_16( diff, 6554 ), Mult_32_16( *diff_sm, 26214 ) ); + move32(); + /**energy_sm = 0.2f * sum_energy + 0.8f * *energy_sm;*/ + *energy_sm = L_add( Mult_32_16( sum_energy, 6554 ), Mult_32_16( *energy_sm, 26214 ) ); + move32(); + /*diff /= sum_energy;*/ + + IF( sum_energy ) + { + exp = norm_l( sum_energy ); + tmp = extract_h( L_shl( sum_energy, exp ) ); + exp = sub( sub( 30, exp ), 7 ); + IF( tmp < 0 ) + { + tmp = abs_s( tmp ); + tmp = div_s( 16384, tmp ); /*Q(15+exp)*/ + BASOP_SATURATE_WARNING_OFF_EVS +#ifdef BASOP_NOGLOB + diff = L_negate( L_shr_o( Mult_32_16( diff, tmp ), sub( exp + 7, 31 ), &Overflow ) ); +#else /* BASOP_NOGLOB */ + diff = L_negate( L_shr( Mult_32_16( diff, tmp ), sub( exp + 7, 31 ) ) ); +#endif /* BASOP_NOGLOB */ + BASOP_SATURATE_WARNING_ON_EVS +#ifdef BASOP_NOGLOB + diff16 = round_fx_o( diff, &Overflow ); +#else /* BASOP_NOGLOB */ + diff16 = round_fx( diff ); +#endif /* BASOP_NOGLOB */ + } + ELSE + { + tmp = div_s( 16384, tmp ); /*Q(15+exp)*/ + BASOP_SATURATE_WARNING_OFF_EVS +#ifdef BASOP_NOGLOB + diff = L_shr_o( Mult_32_16( diff, tmp ), sub( exp + 7, 31 ), &Overflow ); +#else /* BASOP_NOGLOB */ + diff = L_shr( Mult_32_16( diff, tmp ), sub( exp + 7, 31 ) ); +#endif /* BASOP_NOGLOB */ + BASOP_SATURATE_WARNING_ON_EVS +#ifdef BASOP_NOGLOB + diff16 = round_fx_o( diff, &Overflow ); +#else /* BASOP_NOGLOB */ + diff16 = round_fx( diff ); +#endif /* BASOP_NOGLOB */ + } + } + ELSE + { +#ifdef BASOP_NOGLOB + diff16 = round_fx_o( L_shl_o( diff, 25, &Overflow ), &Overflow ); +#else + diff16 = round_fx( L_shl( diff, 25 ) ); +#endif + } + test(); + test(); + IF( LT_32( *diff_sm, -1280 ) && LT_32( *energy_sm, 4928 ) && LT_16( diff16, -26214 ) ) + { + *predecision_flag = 1; + move16(); + } + test(); + test(); + if ( GT_32( *diff_sm, 1280 ) && GT_32( *energy_sm, 10624 ) && GT_16( diff16, 16384 ) ) + { + *predecision_flag = 0; + move16(); + } + + /* short pitch possiblity pre-decision */ + maximum_fx( EspecdB, 7, &energy0_16 ); + maximum_fx( EspecdB + 8, 7, &energy1_16 ); + ratio = s_max( sub( energy1_16, energy0_16 ), 0 ); /*Q7 */ + /*ratio *= max(voicing,0);*/ + tmp = s_max( voicing_m, 0 ); + ratio = mult_r( ratio, tmp ); /*Q7*/ + /**LF_EnergyRatio_sm = (15*(*LF_EnergyRatio_sm) + ratio)/16;*/ + L_tmp = L_mult( ratio, 2048 ); + L_tmp = L_mac( L_tmp, *LF_EnergyRatio_sm, 30720 ); + *LF_EnergyRatio_sm = round_fx( L_tmp ); + move16(); + test(); + if ( GT_16( *LF_EnergyRatio_sm, 4480 ) || GT_16( ratio, 6400 ) ) + { + *predecision_flag = 1; + move16(); + } + + if ( LT_16( *LF_EnergyRatio_sm, 2048 ) ) + { + *predecision_flag = 0; + move16(); + } + + /* short pitch candidate detection */ + Tp = pitch[1]; + move16(); + cor_max = 0; + move16(); + pt_wsp = wsp + 3 * L_SUBFR; + pit_min = PIT_MIN_DOUBLEEXTEND; + move16(); + pit_min_up = PIT_MIN; + move16(); + FOR( T = pit_min; T <= pit_min_up; T++ ) + { + energy1 = Dot_product( pt_wsp, pt_wsp - T, L_SUBFR ); + test(); + IF( ( GT_32( energy1, cor_max ) ) || ( EQ_16( T, pit_min ) ) ) + { + cor_max = L_add( energy1, 0 ); + Tp = T; + move16(); + } + } + energy0 = Dot_product12( pt_wsp, pt_wsp, L_SUBFR, &exp1 ); + exp1 = sub( exp1, shl( Q_new, 1 ) ); + energy1 = Dot_product12( pt_wsp - Tp, pt_wsp - Tp, L_SUBFR, &exp2 ); + exp2 = sub( exp2, shl( Q_new, 1 ) ); + /* cor_max *= inv_sqrt( energy0*energy1 );*/ + L_tmp = Mult_32_32( energy0, energy1 ); + exp = norm_l( L_tmp ); + L_tmp1 = L_shl( L_tmp, exp ); + + exp = sub( sub( 31, exp ), ( sub( sub( 31, exp1 ), exp2 ) ) ); + move16(); + L_tmp1 = Isqrt_lc( L_tmp1, &exp ); /*Q(31-exp)*/ + cor_max = Mult_32_32( cor_max, L_tmp1 ); + exp = add( sub( sub( 31, add( shl( Q_new, 1 ), 1 ) ), sub( 31, exp ) ), 31 ); +#ifdef BASOP_NOGLOB + cor_max16 = round_fx_o( L_shl_o( cor_max, exp, &Overflow ), &Overflow ); /*Q15*/ +#else /* BASOP_NOGLOB */ + cor_max16 = round_fx( L_shl( cor_max, exp ) ); /*Q15*/ +#endif + /**voicing0_sm = add(mult_r(24576 ,(*voicing0_sm)) , mult_r(8192 , cor_max16));*/ + *voicing0_sm = round_fx( L_mac( L_mult( 24576, *voicing0_sm ), 8192, cor_max16 ) ); + move16(); + + /* final short pitch detection */ + test(); + test(); + test(); + *flag_spitch = 0; + move16(); + IF( ( EQ_16( localVAD, 1 ) ) && ( EQ_16( *predecision_flag, 1 ) ) && + ( GT_16( *voicing0_sm, 16384 ) ) && ( GT_16( *voicing0_sm, mult_r( *voicing_sm, 21299 ) ) ) ) + { + *flag_spitch = 1; + move16(); + pitch[0] = Tp; + move16(); + pitch[1] = Tp; + move16(); + pitch[2] = Tp; + move16(); + } + + return; +} +#endif void StableHighPitchDetect( int16_t *flag_spitch, /* o : flag to indicate very short stable pitch*/ int16_t pitch[], /* i/o: OL pitch buffer */ diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 0e64a12c12b856066aca847e16feb9830864c574..7c58d50d345d6d08e57d2e0ef20a3533c9ce2015 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -199,6 +199,23 @@ Word16 ffr_getSfWord32( const Word32 *vector, /*!< Pointer to i vector */ const Word16 len /*!< Length of i vector */ ); +#ifdef IVAS_FLOAT_FIXED +void find_tilt_ivas_fx( + const Word32 fr_bands[], /* i : energy in frequency bands Q_new*/ + const Word32 bckr[], /* i : per band background noise energy estimate Q_new*/ + Word32 ee[2], /* o : lf/hf E ration for present frame Q6*/ + const Word16 pitch[3], /* i : open loop pitch values for 3 half-frames Q0*/ + const Word16 voicing[3], /* i : normalized correlation for 3 half-frames Q15*/ + const Word32 *lf_E, /* i : per bin energy for low frequencies Q_new - 2*/ + const Word16 corr_shift, /* i : normalized correlation correction Q15*/ + const Word16 bwidth, /* i : i signal bandwidth */ + const Word16 max_band, /* i : maximum critical band */ + Word32 hp_E[], /* o : energy in HF Q_new*/ + const Word16 codec_mode, /* i : MODE1 or MODE2 */ + const Word16 Q_new, /* i : scaling factor */ + Word32 *bckr_tilt_lt, + Word16 Opt_vbr_mode ); +#endif // IVAS_FLOAT_FIXED void find_tilt_fx( const Word32 fr_bands[], /* i : energy in frequency bands Q_new + Q_SCALE*/ @@ -216,6 +233,24 @@ void find_tilt_fx( Word32 *bckr_tilt_lt, Word16 Opt_vbr_mode ); +#ifdef IVAS_FLOAT_FIXED +Word16 find_uv_ivas_fx( /* o : coding type */ + Encoder_State *st_fx, /* i/o: encoder state structure */ + const Word16 *T_op_fr, /* i : pointer to adjusted fractional pitch (4 val.) Q6 */ + const Word16 *voicing_fr, /* i : refined correlation for each subframes Q15 */ + const Word16 *speech, /* i : pointer to speech signal for E computation Q_new */ + const Word32 *ee, /* i : lf/hf Energy ratio for present frame Q6 */ + Word32 *dE1X, /* o : sudden energy increase for S/M classifier */ + const Word16 corr_shift, /* i : normalized correlation correction in noise Q15 */ + const Word16 relE, /* i : relative frame energy Q8 */ + const Word16 Etot, /* i : total energy Q8 */ + const Word32 hp_E[], /* i : energy in HF Q_new */ + Word16 *flag_spitch, /* i/o: flag to indicate very short stable pitch and high correlation */ + const Word16 last_core_orig, /* i : original last core */ + STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */ + const Word16 Q_new, + const Word16 q_hp_E ); +#endif // IVAS_FLOAT_FIXED Word16 find_uv_fx( /* o : coding type */ Encoder_State *st_fx, /* i/o: encoder state structure */ const Word16 *T_op_fr, /* i : pointer to adjusted fractional pitch (4 val.) Q6*/ @@ -344,6 +379,7 @@ void noise_est_ivas_fx( const Word32 enr[], /* i : averaged energy over both subframes Q_new + Q_SCALE */ Word32 fr_bands[], /* i : spectrum per critical bands of the current frame Q_new + Q_SCALE */ Word16 *cor_map_sum, /* o : Q8 */ + Word16 *ncharX, /* o : Q11 */ Word16 *sp_div, /* o : Q_sp_div */ Word16 *Q_sp_div, /* o : Q factor for sp_div */ Word16 *non_staX, /* o : non-stationarity for sp/mus classifier */ @@ -533,6 +569,24 @@ void StableHighPitchDetect_fx( Word16 Q_new, Word16 EspecdB[] ); +#ifdef IVAS_FLOAT_FIXED +void StableHighPitchDetect_ivas_fx( + Word16 *flag_spitch, /* o : flag to indicate very short stable pitch */ + Word16 pitch[], /* i/o: OL pitch buffer Q0 */ + const Word16 voicing[], /* i : OL pitch gains Q15 */ + const Word16 wsp[], /* i : weighted speech Qx */ + const Word16 localVAD, + Word16 *voicing_sm, /* i/o: smoothed open-loop pitch gains Q15 */ + Word16 *voicing0_sm, /* i/o: smoothed high pitch gains Q15 */ + Word16 *LF_EnergyRatio_sm, /* i/o: smoothed [0, 300Hz] relative peak energy Q7 */ + Word16 *predecision_flag, /* i/o: predecision flag */ + Word32 *diff_sm, /* i/o: smoothed pitch frequency difference Q7 */ + Word32 *energy_sm, /* i/o: smoothed energy around pitch frequency Q7 */ + Word16 Q_new, + Word16 EspecdB[] /* Q7 */ +); +#endif + void swb_bwe_enc_fx( Encoder_State *st_fx, /* i/o: encoder state structure */ Word16 *old_input_12k8_fx, /* i : i signal @12.8kHz for SWB BWE */ @@ -3072,6 +3126,23 @@ Word16 lp_filt_exc_enc_fx( Word16 *lp_flag /* i/o: mode selection Q0 */ ); +Word16 lp_filt_exc_enc_ivas_fx( + const Word16 codec_mode, /* i : MODE1 or MODE2 Q0 */ + const Word16 coder_type, /* i : coding type Q0 */ + const Word16 i_subfr, /* i : subframe index Q0 */ + Word16 *exc, /* i/o: pointer to excitation signal frame Q_new */ + const Word16 *h1, /* i : weighted filter i response Q(14+shift) */ + const Word16 *xn, /* i : target vector Q_new-1+shift */ + Word16 *y1, /* o : zero-memory filtered adaptive excitation Q_new-1+shift */ + Word16 *xn2, /* o : target vector for innovation search Q_new-1+shift */ + const Word16 L_subfr, /* i : length of vectors for gain quantization Q0 */ + const Word16 L_frame, /* i : frame size Q0 */ + Word16 *g_corr, /* o : ACELP correlation values mant/exp */ + const Word16 clip_gain, /* i : adaptive gain clipping flag Q0 */ + Word16 *gain_pit, /* o : adaptive excitation gain Q14 */ + Word16 *lp_flag /* i/o: mode selection Q0 */ +); + Word16 inov_encode_fx( Encoder_State *st_fx, /* i/o: encoder state structure */ const Word32 core_brate, /* i : core bitrate */ @@ -3143,6 +3214,27 @@ void gain_enc_mless_fx( Word16 *g_corr, /* i/o: correlations , -2,, -2 and 2 */ const Word16 clip_gain /* i : gain pitch clipping flag (1 = clipping) */ ); + +void gain_enc_mless_ivas_fx( + BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ + const Word16 gains_mode[], /* i : gain bits */ + const Word16 element_mode, /* i : element mode */ + const Word16 L_frame, /* i : length of the frame */ + const Word16 i_subfr, /* i : subframe index */ + const Word16 tc_subfr, /* i : TC subframe index */ + const Word16 *xn, /* i : target vector */ + const Word16 *y1, /* i : zero-memory filtered adaptive excitation */ + const Word16 Q_xn, /* i : xn and y1 scaling */ + const Word16 *y2, /* i : zero-memory filtered algebraic codebook excitation */ + const Word16 *code, /* i : algebraic excitation */ + const Word16 Es_pred, /* i : predicted scaled innovation energy */ + Word16 *gain_pit, /* o : quantized pitch gain */ + Word32 *gain_code, /* o : quantized codebook gain */ + Word16 *gain_inov, /* o : gain of the innovation (used for normalization) */ + Word32 *norm_gain_code, /* o : norm. gain of the codebook excitation */ + Word16 *g_corr, /* i/o: correlations , -2,, -2 and 2 */ + const Word16 clip_gain /* i : gain pitch clipping flag (1 = clipping) */ +); void updt_IO_switch_enc_fx( Encoder_State *st, /* i/o: state structure */ const Word16 input_frame /* i : i frame length */ @@ -3205,6 +3297,14 @@ void acelp_core_switch_enc_fx( Word16 shift, Word16 Q_new ); +void acelp_core_switch_enc_ivas_fx( + Encoder_State *st_fx, /* i/o: encoder state structure */ + const Word16 inp12k8[], /* i : i signal @12.8 kHz Q0 */ + const Word16 inp16k[], /* i : i signal @16 kHz Q0 */ + const Word16 A[NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes Q12*/ + Word16 shift, + Word16 Q_new ); + void gain_enc_amr_wb_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 *xn, /* i : target vector */ @@ -3963,7 +4063,22 @@ Word16 multi_harm_fx( /* o : frame multi-harmonicity Word16 *sp_floor, /* o: noise floor estimate Q7 */ Word16 S_map[] /* o : short-term correlation map Q7 */ ); - +#ifdef IVAS_FLOAT_FIXED +Word16 multi_harm_ivas_fx( /* o : frame multi-harmonicity (1-harmonic, 0-not) */ + const Word16 Bin_E[], /* i : log-energy spectrum of the current frame Q7 */ + Word16 old_S[], /* i/o: prev. log-energy spectrum w. subtracted floor Q7 */ + Word16 cor_map_LT[], /* i/o: LT correlation map Q15 */ + Word16 *multi_harm_limit, /* i/o: multi harminic threshold Q9 */ + const Word32 total_brate, /* i : total bitrate Q0 */ + const Word16 bwidth, /* i : input signal bandwidth Q0 */ + Word16 *cor_strong_limit, /* i/o: HF correlation indicator Q0 */ + Word16 *st_mean_avr_dyn, /* i/o: long term average dynamic Q7 */ + Word16 *st_last_sw_dyn, /* i/o: last dynamic Q7 */ + Word16 *cor_map_sum, /* i : sum of correlation map Q8 */ + Word16 *sp_floor, /* o: noise floor estimate Q7 */ + Word16 S_map[] /* o : short-term correlation map Q7 */ +); +#endif void pvq_encode_frame_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 *coefs_norm, /* i : normalized coefficients to encode */ @@ -4207,4 +4322,22 @@ Word16 cng_energy_ivas_fx( const Word16 Q_new /* i : Input scaling */ ); +void generate_comfort_noise_enc_ivas_fx( Encoder_State *stcod, + Word16 Q_new, + Word16 gen_exc ); + +void SynthesisSTFT_enc_ivas_fx( + Word32 *fftBuffer, /* i : pointer to FFT bins */ + Word16 fftBufferExp, /* i : exponent of FFT bins */ + Word16 *timeDomainOutput, /* o : pointer to time domain signal */ + Word16 *olapBuffer, /* i/o : pointer to overlap buffer */ + const PWord16 *olapWin, /* i : pointer to overlap window */ + Word16 tcx_transition, + HANDLE_FD_CNG_COM hFdCngCom, /* i/o : pointer to FD_CNG structure containing all buffers and variables */ + Word16 gen_exc, + Word16 *Q_new, + const Word16 element_mode, /* i : element mode */ + const Word16 nchan_out /* i : number of output channels */ +); + #endif