From d6ac820cc2cacfe4cdb3eed1adb141734d0e1ee5 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Sat, 21 Sep 2024 12:23:28 +0530 Subject: [PATCH 1/2] FdCngEncodeMDCTStereoSID subfunctions, hq_core_enc function and sub-functions converted to fxd --- lib_com/prot_fx.h | 9 + lib_enc/core_enc_init.c | 27 ++- lib_enc/evs_enc.c | 175 +++++++++++++++++ lib_enc/fd_cng_enc.c | 154 ++++++++++++--- lib_enc/hq_core_enc.c | 414 +++++++++++++++++++++++++++++++++++----- lib_enc/ivas_core_enc.c | 163 ++++++++++++++++ lib_enc/prot_fx_enc.h | 9 - lib_enc/stat_enc.h | 1 + 8 files changed, 868 insertions(+), 84 deletions(-) diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 7654c22a8..d1892c0b1 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -7454,6 +7454,15 @@ void fd_bwe_dec_init( FD_BWE_DEC_HANDLE hBWE_FD /* i/o: FD BWE data handle */ ); +void hq_core_enc_ivas_fx( + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *audio_fx, /* i : input audio signal Q0 */ + const Word16 input_frame, /* i : frame length */ + const Word16 hq_core_type, /* i : HQ core type */ + const Word16 Voicing_flag, /* i : Voicing flag for FER method selection */ + const Word16 vad_hover_flag /* i : VAD hangover flag */ +); + // hq_core_dec_fx.c void hq_core_dec_fx( Decoder_State *st_fx, /* i/o: decoder state structure fx */ diff --git a/lib_enc/core_enc_init.c b/lib_enc/core_enc_init.c index 168c336b7..2111560a8 100644 --- a/lib_enc/core_enc_init.c +++ b/lib_enc/core_enc_init.c @@ -44,6 +44,10 @@ #include "prot_fx.h" #include "prot_fx_enc.h" +#ifdef IVAS_FLOAT_FIXED +#include "prot_fx.h" +#endif + /*-----------------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------------*/ @@ -440,6 +444,11 @@ void init_coder_ace_plus( set_zero( hTcxEnc->Txnq_flt, L_FRAME32k / 2 + 64 ); hTcxEnc->acelp_zir_flt = hTcxEnc->Txnq_flt + L_FRAME / 2; hTcxEnc->tcx_target_bits_fac_flt = 1.0f; + +#ifdef IVAS_FLOAT_FIXED + set16_fx( hTcxEnc->Txnq, 0, L_FRAME32k / 2 + 64 ); + hTcxEnc->acelp_zir = hTcxEnc->Txnq + L_FRAME / 2; +#endif } } @@ -1061,8 +1070,13 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol if ( st->hTcxEnc != NULL ) { - hTcxEnc->new_speech_TCX = st->input_buff_fx + L_FRAME48k + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) - NS2SA( st->input_Fs, DELAY_FIR_RESAMPL_NS ); - hTcxEnc->speech_TCX = hTcxEnc->new_speech_TCX - st->encoderLookahead_FB; + st->hTcxEnc->new_speech_TCX_flt = st->input_buff + st->input_Fs / FRAMES_PER_SEC; /* note: in EVS st->new_speech_TCX == st->input - 0.9375ms; in IVAS st->new_speech_TCX == st->input */ + st->hTcxEnc->speech_TCX_flt = st->hTcxEnc->new_speech_TCX_flt - st->encoderLookahead_FB; + +#ifdef IVAS_FLOAT_FIXED + st->hTcxEnc->new_speech_TCX = st->input_buff_fx + Mpy_32_32( st->input_Fs, ONE_BY_FRAMES_PER_SEC_Q31 ); + st->hTcxEnc->speech_TCX = st->hTcxEnc->new_speech_TCX - st->encoderLookahead_FB; +#endif } st->speech_enc = st->buf_speech_enc + st->encoderPastSamples_enc; st->speech_enc_pe = st->buf_speech_enc_pe + st->encoderPastSamples_enc; @@ -1227,6 +1241,11 @@ static void init_acelp( { set_zero( st->hTcxEnc->Txnq_flt, L_FRAME32k / 2 + 64 ); st->hTcxEnc->acelp_zir_flt = st->hTcxEnc->Txnq_flt + ( st->L_frame / 2 ); + +#ifdef IVAS_FLOAT_FIXED + set16_fx( st->hTcxEnc->Txnq, 0, L_FRAME32k / 2 + 64 ); + st->hTcxEnc->acelp_zir = st->hTcxEnc->Txnq + ( st->L_frame / 2 ); +#endif } } else /*Rate switching*/ @@ -1242,6 +1261,10 @@ static void init_acelp( lerp_flt( st->hTcxEnc->Txnq_flt, st->hTcxEnc->Txnq_flt, st->hTcxCfg->tcx_mdct_window_length, st->hTcxCfg->tcx_mdct_window_length_old ); } st->hTcxEnc->acelp_zir_flt = st->hTcxEnc->Txnq_flt + ( st->L_frame / 2 ); + +#ifdef IVAS_FLOAT_FIXED + st->hTcxEnc->acelp_zir = st->hTcxEnc->Txnq + ( st->L_frame / 2 ); +#endif } /* Rate switching */ diff --git a/lib_enc/evs_enc.c b/lib_enc/evs_enc.c index dadacb408..487f00555 100644 --- a/lib_enc/evs_enc.c +++ b/lib_enc/evs_enc.c @@ -42,6 +42,8 @@ #include "wmc_auto.h" #ifdef IVAS_FLOAT_FIXED #include "ivas_prot_fx.h" +#include "prot_fx.h" +#include "prot_fx_enc.h" #endif /*-------------------------------------------------------------------* @@ -248,7 +250,180 @@ ivas_error evs_enc( if ( st->core == HQ_CORE ) { +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + PWord16 *p; + Word16 q = 0; + Word16 overlap, tmp; + Word16 left_overlap, right_overlap; + IF( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) ) + { + left_overlap = st->hTcxCfg->tcx_mdct_window_lengthFB; + right_overlap = st->hTcxCfg->tcx_mdct_window_delayFB; + Word16 L_frame1 = 640; + + p = st->hTcxCfg->tcx_mdct_window_minimumFB; + tmp = shr( st->hTcxCfg->tcx_mdct_window_min_lengthFB, 1 ); + + Word16 q1 = Q_factor_arr( st->hTcxCfg->tcx_mdct_window_minimumFB_flt, tmp ); + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimumFB_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimumFB_flt[i + tmp], q1 ); + } + + p = st->hTcxCfg->tcx_mdct_window_transFB; + tmp = shr( st->hTcxCfg->tcx_mdct_window_min_lengthFB, 1 ); + + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_transFB_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_transFB_flt[i + tmp], q1 ); + } + + p = st->hTcxCfg->tcx_aldo_window_2_FB; + tmp = shr( st->hTcxCfg->tcx_mdct_window_delayFB, 1 ); + + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_FB_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_FB_flt[i + tmp], q1 ); + } + + tmp = st->hTcxCfg->tcx_mdct_window_min_length / 2; + p = st->hTcxCfg->tcx_mdct_window_minimum; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimum_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimum_flt[i + tmp], q1 ); + } + + + Word16 l1 = ( shr( left_overlap, 1 ) * 2 ) + sub( L_frame1, shr( add( left_overlap, right_overlap ), 1 ) ) + ( shr( right_overlap, 1 ) * 2 ) - shr( left_overlap, 1 ) + st->hTcxCfg->tcx_offsetFB; + q = Q_factor_arr( st->hTcxEnc->speech_TCX_flt, l1 ) - 1; + st->hTcxEnc->q_speech_TCX = q; + + Word16 offset, overlap1; + offset = negate( shr( st->hTcxCfg->tcx_mdct_window_trans_lengthFB, 1 ) ); + overlap1 = st->hTcxCfg->tcx_mdct_window_trans_lengthFB; + + floatToFixed_arr( st->hTcxEnc->speech_TCX_flt - overlap1 / 2 + offset, st->hTcxEnc->speech_TCX - overlap1 / 2 + offset, q, l1 ); + floatToFixed_arr( st->hTcxEnc->speech_TCX_flt - overlap1 / 2 + offset, st->hTcxEnc->speech_TCX - overlap1 / 2 + offset, q, 2 * L_FRAME48k + 960 ); + + q1 = 15; + tmp = st->hTcxCfg->tcx_mdct_window_half_length / 2; + p = st->hTcxCfg->tcx_mdct_window_half; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_half_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_half_flt[i + tmp], q1 ); + } + + tmp = st->hTcxCfg->tcx_mdct_window_min_length / 2; + p = st->hTcxCfg->tcx_mdct_window_trans; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_trans_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_trans_flt[i + tmp], q1 ); + } + + overlap = st->hTcxCfg->tcx_mdct_window_length; + tmp = overlap / 2; + p = st->hTcxCfg->tcx_aldo_window_2; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_flt[i + tmp], q1 ); + } + + tmp = overlap / 2; + p = st->hTcxCfg->tcx_aldo_window_1_trunc; + FOR( i = -NS2SA( st->sr_core, N_ZERO_MDCT_NS ); i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_aldo_window_1_trunc_flt[i], q1 ); + } + FOR( i = -NS2SA( st->sr_core, N_ZERO_MDCT_NS ); i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_aldo_window_1_trunc_flt[i + tmp], q1 ); + } + + FOR( i = 0; i < L_FRAME32k / 2 + 64; i++ ) + { + st->hTcxEnc->Txnq[i] = float_to_fix16( st->hTcxEnc->Txnq_flt[i], 0 ); + } + } + ELSE + { + FOR( i = 0; i < L_FRAME32k; i++ ) + { + st->hTcxEnc->old_out_fx[i] = float_to_fix16( st->hTcxEnc->old_out[i], 0 ); + } + } + + IF( NE_16( hq_core_type, LOW_RATE_HQ_CORE ) ) + { + IF( NE_16( st->element_mode, EVS_MONO ) ) + { + Word16 temp_e; + + f2me( st->hHQ_core->crest_lp, &st->hHQ_core->crest_lp_fx, &temp_e ); + st->hHQ_core->crest_lp_q = sub( Q31, temp_e ); + f2me( st->hHQ_core->crest_mod_lp, &st->hHQ_core->crest_mod_lp_fx, &temp_e ); + st->hHQ_core->crest_mod_lp_q = sub( Q31, temp_e ); + } + } + + floatToFixed_arr( st->input - delay, st->input_fx - delay, 0, 960 ); +#endif + hq_core_enc_ivas_fx( st, st->input_fx - delay, input_frame, hq_core_type, Voicing_flag, vad_hover_flag ); + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + IF( EQ_16( hq_core_type, LOW_RATE_HQ_CORE ) ) + { + fixedToFloat_arrL( st->hHQ_core->last_ni_gain_fx, st->hHQ_core->last_ni_gain, Q17, BANDS_MAX ); + fixedToFloat_arr( st->hHQ_core->last_env_fx, st->hHQ_core->last_env, Q1, BANDS_MAX ); + } + ELSE + { + IF( NE_16( st->element_mode, EVS_MONO ) ) + { + + st->hHQ_core->crest_lp = fix_to_float( st->hHQ_core->crest_lp_fx, st->hHQ_core->crest_lp_q ); + st->hHQ_core->crest_mod_lp = fix_to_float( st->hHQ_core->crest_mod_lp_fx, st->hHQ_core->crest_mod_lp_q ); + } + } + IF( st->element_mode > EVS_MONO ) + { + FOR( i = 0; i < L_FRAME32k; i++ ) + { + st->hLPDmem->old_exc_flt[i] = fix16_to_float( st->hLPDmem->old_exc[i], 0 ); + } + } +#endif +#else hq_core_enc( st, st->input - delay, input_frame, hq_core_type, Voicing_flag, vad_hover_flag ); +#endif } /*---------------------------------------------------------------------* diff --git a/lib_enc/fd_cng_enc.c b/lib_enc/fd_cng_enc.c index bf0cc2136..514a5ffbd 100644 --- a/lib_enc/fd_cng_enc.c +++ b/lib_enc/fd_cng_enc.c @@ -1180,10 +1180,19 @@ void FdCngEncodeMDCTStereoSID( int16_t gain_idx[CPE_CHANNELS]; int16_t N, stages, ch, p, coh_idx; float side_energy; +#ifdef IVAS_FLOAT_FIXED + Word32 *lr_out_ptr_fx[CPE_CHANNELS]; + Word32 gain_fx[CPE_CHANNELS]; + Word32 side_energy_fx; + Word32 *invTrfMatrix_fx; + Word32 tmpRAM_fx[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; /*24*18*/ + invTrfMatrix_fx = (Word32 *) tmpRAM_fx; /* dynamically filled */ +#endif int16_t no_side_flag; int16_t is_inp_ms; - float tot_sig_ext[FDCNG_VQ_MAX_LEN], dct_target[CPE_CHANNELS][FDCNG_VQ_DCT_MAXTRUNC]; /* 24 +2*18*/ + float tot_sig_ext[FDCNG_VQ_MAX_LEN] /*, dct_target[CPE_CHANNELS][FDCNG_VQ_DCT_MAXTRUNC]*/; /* 24 +2*18*/ + Word32 tot_sig_ext_fx[FDCNG_VQ_MAX_LEN], dct_target_fx[CPE_CHANNELS][FDCNG_VQ_DCT_MAXTRUNC]; /* 24 +2*18*/ float *invTrfMatrix; float tmpRAM[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; /*24*18*/ invTrfMatrix = (float *) tmpRAM; /* dynamically filled */ @@ -1202,6 +1211,9 @@ void FdCngEncodeMDCTStereoSID( lr_in_ptr[ch] = &sts[ch]->hFdCngEnc->msNoiseEst[0]; ms_ptr[ch] = &logNoiseEst[ch][0]; lr_out_ptr[ch] = &sts[ch]->hFdCngEnc->hFdCngCom->sidNoiseEst_flt[0]; +#ifdef IVAS_FLOAT_FIXED + lr_out_ptr_fx[ch] = &sts[ch]->hFdCngEnc->hFdCngCom->sidNoiseEst[0]; +#endif } N = sts[0]->hFdCngEnc->npartDec; set_f( weights, 1.f, NPART ); @@ -1216,17 +1228,19 @@ void FdCngEncodeMDCTStereoSID( E[ch] += ms_ptr[ch][p]; } } - +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word32 ms_ptr_fx[2][NPART]; + Word16 q = s_min( Q_factor_arrL( ms_ptr[0], N ), Q_factor_arrL( ms_ptr[1], N ) ) - 1; + floatToFixed_arrL( ms_ptr[0], ms_ptr_fx[0], q, N ); + floatToFixed_arrL( ms_ptr[1], ms_ptr_fx[1], q, N ); +#endif /* M/S transform on log envelopes */ if ( is_inp_ms == 0 ) { #ifndef IVAS_FLOAT_FIXED convertToMS( N, ms_ptr[0], ms_ptr[1], 0.5f ); #else - Word32 ms_ptr_fx[2][NPART]; - Word16 q = s_min( Q_factor_arrL( ms_ptr[0], N ), Q_factor_arrL( ms_ptr[1], N ) ) - 1; - floatToFixed_arrL( ms_ptr[0], ms_ptr_fx[0], q, N ); - floatToFixed_arrL( ms_ptr[1], ms_ptr_fx[1], q, N ); + convertToMS_fx( N, ms_ptr_fx[0], ms_ptr_fx[1], ONE_IN_Q30 ); @@ -1234,8 +1248,15 @@ void FdCngEncodeMDCTStereoSID( fixedToFloat_arrL( ms_ptr_fx[1], ms_ptr[1], q, N ); #endif } - +#ifndef IVAS_FLOAT_FIXED side_energy = sum2_f( ms_ptr[1], N ); +#else + Word16 gb = find_guarded_bits_fx( N ); + side_energy_fx = sum2_f_32_fx( ms_ptr_fx[1], N, gb ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + side_energy = fixedToFloat( side_energy_fx, 2 * q - 31 - gb ); +#endif +#endif /* do not transmit side shape if initial noise shapes are very similar */ if ( side_energy <= 0.1f ) @@ -1269,27 +1290,68 @@ void FdCngEncodeMDCTStereoSID( /* High quality cosine smooth basis extension used to not introduce noise in stage#1 DCT24 analysis and subsequent VQ-steps */ if ( N == FDCNG_VQ_MAX_LEN_WB ) { - create_IDCT_N_Matrix( invTrfMatrix, N, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); /*WB: create truncated IDCT21 matrix */ +#ifdef IVAS_FLOAT_FIXED + Word16 size_value, temp_e; + size_value = BASOP_Util_Divide1616_Scale( sizeof( tmpRAM_fx ), ( sizeof( Word32 ) ), &temp_e ); /*Q15*/ + size_value = shr( size_value, sub( 15, temp_e ) ); + create_IDCT_N_Matrix_fx( invTrfMatrix_fx, N, FDCNG_VQ_DCT_MAXTRUNC, size_value ); // Q31 /*WB: create truncated IDCT21 matrix */ +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + for ( int i = 0; i < FDCNG_VQ_MAX_LEN; i++ ) + { + fixedToFloat_arrL( tmpRAM_fx[i], tmpRAM[i], Q31, FDCNG_VQ_DCT_MAXTRUNC ); + } + q = s_min( Q_factor_arrL( ms_ptr[0], N ), Q_factor_arrL( ms_ptr[1], N ) ) - 1; + floatToFixed_arrL( ms_ptr[0], ms_ptr_fx[0], q, N ); + floatToFixed_arrL( ms_ptr[1], ms_ptr_fx[1], q, N ); +#endif +#else + create_IDCT_N_Matrix( invTrfMatrix, N, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); /*WB: create truncated IDCT21 matrix */ +#endif for ( ch = 0; ch < CPE_CHANNELS; ch++ ) { /* run DCT_N N==21 , truncated at 18/21 ~= 86% , i.e use a bit better better quality in extrapolation , than subsequent DCT24 analysis which is truncated at 75%*/ +#ifdef IVAS_FLOAT_FIXED + /* truncated DCT 21 analysis */ + dctT2_N_apply_matrix_fx( (const Word32 *) ms_ptr_fx[ch], dct_target_fx[ch], FDCNG_VQ_DCT_MAXTRUNC, N, invTrfMatrix_fx, FDCNG_VQ_DCT_MAXTRUNC, DCT_T2_21_XX ); + /* extrapolate extend fdcng envelope signal in the fdncg ienvelope/"time" domain using DCT21 basis vectors, + estimated DCT21 coeffs scaling extended basis vectors are used to create extrapolated length 24 input target envelope signal */ + /* this DCT21 extension does not introduce DCT24 coefficient noise for the subsequent dct24 target analysis, and later in IDCT24 synthesis */ + /* truncated IDCT 21 extension synthesis */ + extend_dctN_input_fx( ms_ptr_fx[ch], dct_target_fx[ch], N, tot_sig_ext_fx, FDCNG_VQ_MAX_LEN, invTrfMatrix_fx /* DCT_N basis vectors */, FDCNG_VQ_DCT_MAXTRUNC, IDCT_T2_XX_21 ); /* use 18 basis vectors*/ +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + fixedToFloat_arrL( tot_sig_ext_fx, tot_sig_ext, q, FDCNG_VQ_MAX_LEN ); +#endif +#else /* truncated DCT 21 analysis */ dctT2_N_apply_matrix( (const float *) ms_ptr[ch], dct_target[ch], FDCNG_VQ_DCT_MAXTRUNC, N, invTrfMatrix, FDCNG_VQ_DCT_MAXTRUNC, DCT_T2_21_XX ); - /* extrapolate extend fdcng envelope signal in the fdncg ienvelope/"time" domain using DCT21 basis vectors, estimated DCT21 coeffs scaling extended basis vectors are used to create extrapolated length 24 input target envelope signal */ /* this DCT21 extension does not introduce DCT24 coefficient noise for the subsequent dct24 target analysis, and later in IDCT24 synthesis */ /* truncated IDCT 21 extension synthesis */ extend_dctN_input( ms_ptr[ch], dct_target[ch], N, tot_sig_ext, FDCNG_VQ_MAX_LEN, invTrfMatrix /* DCT_N basis vectors */, FDCNG_VQ_DCT_MAXTRUNC, IDCT_T2_XX_21 ); /* use 18 basis vectors*/ +#endif + mvr2r( tot_sig_ext, ms_ptr[ch], FDCNG_VQ_MAX_LEN ); /* write extended result as input to VQ */ } } - create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); /*always create/set up IDCT24 matrix in RAM */ - +#ifdef IVAS_FLOAT_FIXED + Word16 size_value, temp_e; + size_value = BASOP_Util_Divide1616_Scale( sizeof( tmpRAM_fx ), ( sizeof( Word32 ) ), &temp_e ); /*Q15*/ + size_value = shr( size_value, sub( 15, temp_e ) ); + create_IDCT_N_Matrix_fx( invTrfMatrix_fx, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, size_value ); /*always create/set up IDCT24 matrix in RAM */ +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + for ( int i = 0; i < FDCNG_VQ_MAX_LEN; i++ ) + { + fixedToFloat_arrL( tmpRAM_fx[i], tmpRAM[i], Q31, FDCNG_VQ_DCT_MAXTRUNC ); + } +#endif +#else + create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); /*always create/set up IDCT24 matrix in RAM */ +#endif /* end split */ for ( ch = 0; ch < CPE_CHANNELS; ch++ ) { @@ -1310,19 +1372,17 @@ void FdCngEncodeMDCTStereoSID( { #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - Word32 ms_ptr_fx[24]; + Word16 ms_ptr_e; - Word32 *invTrfMatrix_fx; - Word32 tmpRAM_fx[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; Word16 weights_fx[24]; invTrfMatrix_fx = (Word32 *) tmpRAM_fx; - f2me_buf( ms_ptr[ch], ms_ptr_fx, &ms_ptr_e, N ); + f2me_buf( ms_ptr[ch], ms_ptr_fx[ch], &ms_ptr_e, N ); floatToFixed_arrL( invTrfMatrix, invTrfMatrix_fx, Q31, FDCNG_VQ_MAX_LEN * FDCNG_VQ_DCT_MAXTRUNC ); floatToFixed_arr( weights, weights_fx, Q8, 24 ); #endif - msvq_enc_ivas_fx( ivas_cdk_37bits_fx, Q7, NULL, NULL, ms_ptr_fx, ms_ptr_e, levels_37bits, FD_CNG_maxC_37bits, stages, weights_fx, N, FD_CNG_maxN_37bits, 1, invTrfMatrix_fx, indices[ch] ); + msvq_enc_ivas_fx( ivas_cdk_37bits_fx, Q7, NULL, NULL, ms_ptr_fx[ch], ms_ptr_e, levels_37bits, FD_CNG_maxC_37bits, stages, weights_fx, N, FD_CNG_maxN_37bits, 1, invTrfMatrix_fx, indices[ch] ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arrL( invTrfMatrix_fx, invTrfMatrix, Q31, FDCNG_VQ_MAX_LEN * FDCNG_VQ_DCT_MAXTRUNC ); #endif @@ -1344,15 +1404,17 @@ void FdCngEncodeMDCTStereoSID( #ifndef IVAS_FLOAT_FIXED convertToMS( N, ms_ptr[0], ms_ptr[1], 1.0f ); #else - Word32 ms_ptr_fx[2][NPART]; - Word16 q = s_min( Q_factor_arrL( ms_ptr[0], N ), Q_factor_arrL( ms_ptr[1], N ) ) - 1; +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + // Word32 ms_ptr_fx[2][NPART]; + q = s_min( Q_factor_arrL( ms_ptr[0], N ), Q_factor_arrL( ms_ptr[1], N ) ) - 1; floatToFixed_arrL( ms_ptr[0], ms_ptr_fx[0], q, N ); floatToFixed_arrL( ms_ptr[1], ms_ptr_fx[1], q, N ); - +#endif convertToMS_fx( N, ms_ptr_fx[0], ms_ptr_fx[1], ONE_IN_Q31 ); - +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arrL( ms_ptr_fx[0], ms_ptr[0], q, N ); fixedToFloat_arrL( ms_ptr_fx[1], ms_ptr[1], q, N ); +#endif #endif } @@ -1365,8 +1427,18 @@ void FdCngEncodeMDCTStereoSID( gain[ch] += ms_ptr[ch][p]; } gain[ch] = ( E[ch] - gain[ch] ) / (float) N; +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + gain_fx[ch] = floatToFixed_32( gain[ch], Q23 ); +#endif + apply_scale( &gain_fx[ch], sts[ch]->hFdCngEnc->hFdCngCom->CngBandwidth, sts[ch]->element_brate, scaleTableStereo, SIZE_SCALE_TABLE_STEREO ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + gain[ch] = fixedToFloat_32( gain_fx[ch], Q23 ); +#endif +#else apply_scale_flt( &gain[ch], sts[ch]->hFdCngEnc->hFdCngCom->CngBandwidth, sts[ch]->element_brate, scaleTableStereo, SIZE_SCALE_TABLE_STEREO ); +#endif /* quantize gain */ gain_idx[ch] = (int16_t) floor( gain[ch] * 1.5f + GAIN_Q_OFFSET_IVAS + .5f ); gain_idx[ch] = max( 0, min( 127, gain_idx[ch] ) ); @@ -1385,11 +1457,27 @@ void FdCngEncodeMDCTStereoSID( lr_out_ptr[ch][p] = powf( 10.f, ( ms_ptr[ch][p] + gain[ch] ) / 10.f ); } +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + q = Q_factor_arrL( lr_out_ptr[ch], N ); + floatToFixed_arrL( lr_out_ptr[ch], lr_out_ptr_fx[ch], q, N ); +#endif + /* scale bands and get scalefactors */ + scalebands( lr_out_ptr_fx[ch], hFdCngEnc->partDec, N, hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec,sub( hFdCngEnc->stopBandDec , hFdCngEnc->startBandDec), hFdCngCom->cngNoiseLevel, 1 ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + fixedToFloat_arrL( hFdCngCom->cngNoiseLevel, hFdCngCom->cngNoiseLevel_flt, q, FFTCLDFBLEN ); +#endif + lpc_from_spectrum( hFdCngCom, hFdCngEnc->startBandDec, hFdCngEnc->stopFFTbinDec, sts[ch]->preemph_fac ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + sts[ch]->preemph_fac_flt = fixedToFloat( sts[ch]->preemph_fac, Q15 ); +#endif +#else /* scale bands and get scalefactors */ scalebands_flt( lr_out_ptr[ch], hFdCngEnc->partDec, N, hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec, hFdCngEnc->stopBandDec - hFdCngEnc->startBandDec, hFdCngCom->cngNoiseLevel_flt, 1 ); - lpc_from_spectrum_flt( hFdCngCom, hFdCngEnc->startBandDec, hFdCngEnc->stopFFTbinDec, sts[ch]->preemph_fac_flt ); +#endif + sts[ch]->hDtxEnc->last_CNG_L_frame = sts[ch]->L_frame; } @@ -1451,6 +1539,9 @@ void FdCngEncodeDiracMDCTStereoSID( float *lr_in_ptr[CPE_CHANNELS]; float *ms_ptr[CPE_CHANNELS]; float *lr_out_ptr[CPE_CHANNELS]; +#ifdef IVAS_FLOAT_FIXED + Word32 *lr_out_ptr_fx[CPE_CHANNELS]; +#endif float logNoiseEst[CPE_CHANNELS][NPART]; float E[CPE_CHANNELS]; float gain[CPE_CHANNELS]; @@ -1473,6 +1564,9 @@ void FdCngEncodeDiracMDCTStereoSID( lr_in_ptr[ch] = &sts[ch]->hFdCngEnc->msNoiseEst[0]; ms_ptr[ch] = &logNoiseEst[ch][0]; lr_out_ptr[ch] = &sts[ch]->hFdCngEnc->hFdCngCom->sidNoiseEst_flt[0]; +#ifdef IVAS_FLOAT_FIXED + lr_out_ptr_fx[ch] = &sts[ch]->hFdCngEnc->hFdCngCom->sidNoiseEst[0]; +#endif } set_f( weights, 1.f, NPART ); @@ -1602,12 +1696,26 @@ void FdCngEncodeDiracMDCTStereoSID( { lr_out_ptr[ch][N[ch] - 1] *= SWB_13k2_LAST_BAND_SCALE_FLT; } - +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + q = Q_factor_arrL( lr_out_ptr[ch], N[ch] ); + floatToFixed_arrL( lr_out_ptr[ch], lr_out_ptr_fx[ch], q, N[ch] ); +#endif + /* scale bands and get scalefactors */ + scalebands( lr_out_ptr_fx[ch], hFdCngEnc->partDec, N[ch], hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec, sub(hFdCngEnc->stopBandDec , hFdCngEnc->startBandDec), hFdCngCom->cngNoiseLevel, 1 ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + fixedToFloat_arrL( hFdCngCom->cngNoiseLevel, hFdCngCom->cngNoiseLevel_flt, q, FFTCLDFBLEN ); +#endif + lpc_from_spectrum( hFdCngCom, hFdCngEnc->startBandDec, hFdCngEnc->stopFFTbinDec, sts[ch]->preemph_fac ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + sts[ch]->preemph_fac_flt = fixedToFloat( sts[ch]->preemph_fac, Q15 ); +#endif +#else /* scale bands and get scalefactors */ scalebands_flt( lr_out_ptr[ch], hFdCngEnc->partDec, N[ch], hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec, hFdCngEnc->stopBandDec - hFdCngEnc->startBandDec, hFdCngCom->cngNoiseLevel_flt, 1 ); lpc_from_spectrum_flt( hFdCngCom, hFdCngEnc->startBandDec, hFdCngEnc->stopFFTbinDec, sts[ch]->preemph_fac_flt ); - +#endif sts[ch]->hDtxEnc->last_CNG_L_frame = sts[ch]->L_frame; } sts[0]->hFdCngEnc->hFdCngCom->coherence_flt = 0.0f; diff --git a/lib_enc/hq_core_enc.c b/lib_enc/hq_core_enc.c index 0caa1ea0d..e805cf4e0 100644 --- a/lib_enc/hq_core_enc.c +++ b/lib_enc/hq_core_enc.c @@ -53,6 +53,369 @@ * HQ core encoder *--------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void hq_core_enc_ivas_fx( + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *audio_fx, /* i : input audio signal Q0 */ + const Word16 input_frame_orig, /* i : frame length */ + const Word16 hq_core_type, /* i : HQ core type */ + const Word16 Voicing_flag, /* i : Voicing flag for FER method selection */ + const Word16 vad_hover_flag /* i : VAD hangover flag */ +) +{ + Word16 i, is_transient, num_bits, extra_unused; + + Word32 t_audio_fx[L_FRAME48k_EXT]; + Word16 wtda_audio_fx16[2 * L_FRAME48k]; + Word32 wtda_audio_fx32[2 * L_FRAME48k]; + Word16 two_frames_buffer[2 * L_FRAME48k]; + Word16 tmp; + Word16 Aq_old_fx[M + 1]; + Word16 output_fx[L_FRAME16k]; + Word16 Q_audio; + Word16 out_q = 0, old_q = 0; + move16(); + move16(); + Word16 inner_frame, input_frame, L_frame; + Word16 L_spec, overlap, nz, tcx_offset; + Word16 left_overlap, right_overlap; + BSTR_ENC_HANDLE hBstr = st->hBstr; + Word16 q = 0; + Word16 exp; + move16(); + + push_wmops( "hq_core_enc" ); + + set16_fx( wtda_audio_fx16, 0, 2 * L_FRAME48k ); + set32_fx( wtda_audio_fx32, 0, 2 * L_FRAME48k ); + st->Nb_ACELP_frames = 0; + move16(); + set_zero_fx( t_audio_fx, L_FRAME48k ); + /* set input_frame length */ + input_frame = input_frame_orig; + + /* Sanity check, it should never happen at the encoder side (no BFI) */ + IF( st->hTcxCfg->tcx_curr_overlap_mode == FULL_OVERLAP ) + { + st->hTcxCfg->tcx_last_overlap_mode = ALDO_WINDOW; + } + ELSE + { + st->hTcxCfg->tcx_last_overlap_mode = st->hTcxCfg->tcx_curr_overlap_mode; + } + move16(); + st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW; + move16(); + + /*-------------------------------------------------------------------------- + * Preprocessing in the first HQ frame after ACELP frame + * Find the number of bits for PVQ coding + * Write signaling information + *--------------------------------------------------------------------------*/ + + num_bits = BASOP_Util_Divide3232_Scale( st->total_brate, FRAMES_PER_SEC, &exp ); + num_bits = shr( num_bits, sub( 15, exp ) ); + extra_unused = 0; + move16(); + + /*-------------------------------------------------------------------------- + * Detect signal transition + *--------------------------------------------------------------------------*/ + + is_transient = detect_transient_fx( audio_fx, input_frame, 0, st ); + move16(); + + test(); + test(); + IF( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || EQ_16( st->last_core, AMR_WB_CORE ) ) ) + { + /*-------------------------------------------------------------------------- + * IVAS switching frame + *--------------------------------------------------------------------------*/ + + L_spec = input_frame; + left_overlap = -1; + right_overlap = -1; + move16(); + move16(); + move16(); + + WindowSignal( st->hTcxCfg, st->hTcxCfg->tcx_offsetFB, TRANSITION_OVERLAP, FULL_OVERLAP, &left_overlap, &right_overlap, st->hTcxEnc->speech_TCX, &L_spec, wtda_audio_fx16, 1, 1 ); + + q = st->hTcxEnc->q_speech_TCX; + move16(); + Q_audio = sub( Q16, q ); + TCX_MDCT( wtda_audio_fx16, t_audio_fx, &Q_audio, left_overlap, sub( L_spec, shr( add( left_overlap, right_overlap ), 1 ) ), right_overlap, st->element_mode ); + Q_audio = sub( Q31, Q_audio ); + Copy_Scale_sig_16_32( wtda_audio_fx16, wtda_audio_fx32, 2 * L_FRAME48k, sub( Q_audio, q ) ); + + inner_frame = inner_frame_tbl[st->bwidth]; + L_spec = l_spec_ext_tbl[st->bwidth]; + is_transient = 0; + move16(); + move16(); + move16(); + } + ELSE + { + /*-------------------------------------------------------------------------- + * Windowing and time-domain aliasing + * DCT transform + *--------------------------------------------------------------------------*/ + Q_audio = 0; + move16(); + floatToFixed_arr( st->old_input_signal, st->old_input_signal_fx, 0, add( L_FRAME48k, add( L_FRAME48k, NS2SA( 48000, DELAY_FIR_RESAMPL_NS ) ) ) ); + st->q_inp = 0; + move16(); + Copy( st->old_input_signal_fx, two_frames_buffer, input_frame ); + Copy( audio_fx, two_frames_buffer + input_frame, input_frame ); + + wtda_fx( two_frames_buffer + input_frame, &Q_audio, wtda_audio_fx32, NULL, 0, + st->hTcxCfg->tcx_last_overlap_mode, st->hTcxCfg->tcx_curr_overlap_mode, input_frame ); + + test(); + IF( st->last_core == ACELP_CORE || EQ_16( st->last_core, AMR_WB_CORE ) ) + { + /* Preprocessing in the first HQ frame after ACELP frame */ + core_switching_hq_prepare_enc_fx( st, &num_bits, input_frame, wtda_audio_fx32, two_frames_buffer + input_frame ); + + /* During ACELP->HQ core switching, limit the HQ core bitrate to 48kbps */ + IF( GT_16( num_bits, ACELP_48k_BITS ) ) + { + extra_unused = sub( num_bits, ACELP_48k_BITS ); + num_bits = ACELP_48k_BITS; + move16(); + } + } + + Word16 tmp_q = Q_audio; + move16(); + direct_transform_fx( wtda_audio_fx32, t_audio_fx, is_transient, input_frame, &Q_audio, st->element_mode ); + scale_sig32( wtda_audio_fx32, L_FRAME48k_EXT, sub( Q_audio, tmp_q ) ); + + /* scale coefficients to their nominal level (8kHz) */ + IF( NE_16( input_frame, NORM_MDCT_FACTOR ) ) + { + UWord16 lsb; + tmp = mult_r( input_frame, 410 / 2 ); /* 1/8000 in Q15 */ + Word16 ener_match_fx = hq_nominal_scaling[tmp]; + FOR( i = 0; i < input_frame; i++ ) + { + /*t_audio_q[i] *= ener_match; */ + Mpy_32_16_ss( t_audio_fx[i], ener_match_fx, &t_audio_fx[i], &lsb ); + move16(); /* Q12 */ + } + } + + /* limit encoded band-width according to the command-line OR BWD limitation */ + inner_frame = inner_frame_tbl[st->bwidth]; + L_spec = l_spec_tbl[st->bwidth]; + + move16(); + move16(); + + + IF( GT_16( input_frame, inner_frame ) ) + { + IF( EQ_16( is_transient, 1 ) ) + { + FOR( i = 1; i < NUM_TIME_SWITCHING_BLOCKS; i++ ) + { + tmp = shr( inner_frame, 2 ); + Copy32( t_audio_fx + i_mult2( i, shr( input_frame, 2 ) ), t_audio_fx + i_mult2( i, tmp ), tmp ); + } + } + + set32_fx( t_audio_fx + inner_frame, 0, sub( input_frame, inner_frame ) ); + } + } + + + /* subtract signaling bits */ + num_bits = sub( num_bits, hBstr->nb_bits_tot ); + + /*-------------------------------------------------------------------------- + * High-band gain control in case of BWS + *--------------------------------------------------------------------------*/ + + IF( st->bwidth_sw_cnt > 0 ) + { + Word16 exp; + Word32 L_tmp; + tmp = BASOP_Util_Divide1616_Scale( 3, BWS_TRAN_PERIOD, &exp ); + shr( tmp, exp ); + L_tmp = L_deposit_h( tmp ); + IF( is_transient ) + { + FOR( i = 0; i < NUM_TIME_SWITCHING_BLOCKS; i++ ) + { + v_multc_fixed( t_audio_fx + add( i_mult2( i, shr( inner_frame, 2 ) ), L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS ), L_tmp, t_audio_fx + add( i_mult2( i, shr( inner_frame, 2 ) ), L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS ), sub( shr( inner_frame, 2 ), L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS ) ); // Q_audio + Q31 - Q31 -> Q_audio + } + } + ELSE + { + v_multc_fixed( t_audio_fx + L_FRAME16k, L_tmp, t_audio_fx + L_FRAME16k, L_spec - L_FRAME16k ); // Q_audio + Q31 - Q31 -> Q_audio + } + } + + /*-------------------------------------------------------------------------- + * Classify whether to put extra bits for FER mitigation + *--------------------------------------------------------------------------*/ + + test(); + test(); + test(); + IF( ( EQ_16( st->last_core, TCX_20_CORE ) || EQ_16( st->last_core, TCX_10_CORE ) || EQ_16( st->last_core, HQ_CORE ) ) && GT_32( st->core_brate, MINIMUM_RATE_TO_ENCODE_VOICING_FLAG ) ) + { + IF( Voicing_flag > 0 ) + { + push_indice( hBstr, IND_HQ_VOICING_FLAG, 1, 1 ); + num_bits = sub( num_bits, 1 ); + } + ELSE + { + push_indice( hBstr, IND_HQ_VOICING_FLAG, 0, 1 ); + num_bits = sub( num_bits, 1 ); + } + } + + /*-------------------------------------------------------------------------- + * Transform-domain encoding + *--------------------------------------------------------------------------*/ + + scale_sig32( t_audio_fx, L_FRAME48k, sub( Q12, Q_audio ) ); + scale_sig32( wtda_audio_fx32, 2 * L_FRAME48k, sub( Q12, Q_audio ) ); + Q_audio = 12; + move16(); + IF( EQ_16( hq_core_type, LOW_RATE_HQ_CORE ) ) + { + + hq_lr_enc_ivas_fx( st, t_audio_fx, inner_frame, &num_bits, is_transient ); + } + ELSE + { + /* HQ high rate encoder */ + hq_hr_enc_ivas_fx( st, t_audio_fx, L_spec, &num_bits, is_transient, vad_hover_flag ); + } + + /* write all unused bits to the bitstream */ + num_bits = add( num_bits, extra_unused ); + + WHILE( GT_16( num_bits, 16 ) ) + { + push_indice( hBstr, IND_UNUSED, 0, 16 ); + num_bits = sub( num_bits, 16 ); + } + + IF( num_bits != 0 ) + { + push_indice( hBstr, IND_UNUSED, 0, num_bits ); + } + + test(); + test(); + IF( st->element_mode > EVS_MONO && ( st->last_core, ACELP_CORE || EQ_16( st->last_core, AMR_WB_CORE ) ) ) + { + overlap = st->hTcxCfg->tcx_mdct_window_length; + move16(); + + nz = NS2SA( st->sr_core, N_ZERO_MDCT_NS ); + move16(); + L_frame = sub( st->L_frame + st->hTcxCfg->tcx_offset, st->hTcxCfg->lfacNext ); + tcx_offset = st->hTcxCfg->lfacNext; + move16(); + + set16_fx( Aq_old_fx, 0, M + 1 ); /* Dummy filter */ + Aq_old_fx[0] = 1; + move16(); + + /* Code taken from InternalTCXDecoder() */ + Copy_Scale_sig_32_16( wtda_audio_fx32, wtda_audio_fx16, 2 * L_FRAME48k, negate( Q_audio ) ); // Q0 + TCX_MDCT_Inverse( t_audio_fx, sub( sub( 31, Q_audio ), 15 ), wtda_audio_fx16, overlap, sub( L_frame, overlap ), overlap, st->element_mode ); + + + /* Window current frame */ + Word16 tcx_offset_tmp; + + IF( tcx_offset < 0 ) + { + tcx_offset_tmp = negate( tcx_offset ); + } + ELSE + { + tcx_offset_tmp = 0; + move16(); + } + tcx_windowing_synthesis_current_frame( wtda_audio_fx16, st->hTcxCfg->tcx_aldo_window_2, st->hTcxCfg->tcx_mdct_window_half, st->hTcxCfg->tcx_mdct_window_minimum, overlap, /*st->hTcxCfg->tcx_mdct_window_length*/ st->hTcxCfg->tcx_mdct_window_half_length, st->hTcxCfg->tcx_mdct_window_min_length, st->last_core == ACELP_CORE, st->hTcxCfg->tcx_last_overlap_mode, /*left mode*/ st->hTcxEnc->acelp_zir, st->hTcxEnc->Txnq, NULL, Aq_old_fx, st->hTcxCfg->tcx_mdct_window_trans, shr( st->L_frame, 1 ), tcx_offset_tmp, st->last_core, 0, 0 ); + + + /*Compute windowed synthesis in case of switching to ALDO windows in next frame*/ + Copy( wtda_audio_fx16 + sub( L_frame, nz ), st->hTcxEnc->old_out_fx, nz + overlap ); + set16_fx( st->hTcxEnc->old_out_fx + add( nz, overlap ), 0, nz ); + + tcx_windowing_synthesis_past_frame( st->hTcxEnc->old_out_fx + nz, st->hTcxCfg->tcx_aldo_window_1_trunc, st->hTcxCfg->tcx_mdct_window_half, st->hTcxCfg->tcx_mdct_window_minimum, overlap, st->hTcxCfg->tcx_mdct_window_half_length, st->hTcxCfg->tcx_mdct_window_min_length, FULL_OVERLAP ); + + FOR( i = 0; i < nz; i++ ) + { + Word16 reverse_index = sub( sub( nz, 1 ), i ); + + IF( LT_16( reverse_index, shr( nz, 1 ) ) ) + { + // Imaginary part: reverse index corresponds to imaginary part + st->hTcxEnc->old_out_fx[nz + overlap + i] = mult( wtda_audio_fx16[L_frame - 1 - i], st->hTcxCfg->tcx_aldo_window_1_trunc[-1 - i].v.im ); // Q0 + Q15 - Q15 -> Q0 + move16(); + } + ELSE + { + // Real part: reverse index - nz/2 corresponds to real part + st->hTcxEnc->old_out_fx[nz + overlap + i] = mult( wtda_audio_fx16[L_frame - 1 - i], st->hTcxCfg->tcx_aldo_window_1_trunc[-1 - ( i - shr( nz, 1 ) )].v.re ); // Q0 + Q15 - Q15 -> Q0 + move16(); + } + } + + Copy( wtda_audio_fx16 + sub( shr( overlap, 1 ), tcx_offset ), output_fx, st->L_frame ); + } + ELSE + { + Word16 tmp_q = Q_audio; + move16(); + Word32 ener_match_fx = SQRT2_FIXED; + move32(); + FOR( i = 0; i < input_frame; i++ ) + { + t_audio_fx[i] = Mpy_32_32( t_audio_fx[i], ener_match_fx ); + move32(); /* Q12 - 1 -> Q11 */ + } + + Q_audio = sub( Q_audio, 1 ); + scale_sig32( wtda_audio_fx32, 2 * L_FRAME48k, sub( Q_audio, tmp_q ) ); + Inverse_Transform( t_audio_fx, &Q_audio, wtda_audio_fx32, is_transient, L_FRAME16k, inner_frame, st->element_mode ); + + + out_q = Q_audio; + move16(); + window_ola_fx( wtda_audio_fx32, output_fx, &out_q, st->hTcxEnc->old_out_fx, &old_q, L_FRAME16k, st->hTcxCfg->tcx_last_overlap_mode, st->hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL ); + + Scale_sig( output_fx, L_FRAME16k, negate( out_q ) ); + Scale_sig( st->hTcxEnc->old_out_fx, L_FRAME32k, negate( old_q ) ); + st->hTcxEnc->Q_old_out = 0; + move16(); + } + + IF( st->element_mode > EVS_MONO ) + { + /* Store LB synthesis in case of switch to ACELP */ + Copy( output_fx, st->hLPDmem->old_exc, L_FRAME16k ); + st->hLPDmem->e_old_exc = 15; + move16(); + } + pop_wmops(); + + return; +} +#endif + +#ifndef IVAS_FLOAT_FIXED void hq_core_enc( Encoder_State *st, /* i/o: encoder state structure */ const float *audio, /* i : input audio signal */ @@ -71,10 +434,6 @@ void hq_core_enc( int16_t L_spec, overlap, nz, tcx_offset; float Aq_old[M + 1]; float output[L_FRAME16k]; - -#ifdef IVAS_FLOAT_FIXED - Word32 t_audio_fx[L_FRAME48k_EXT]; -#endif BSTR_ENC_HANDLE hBstr = st->hBstr; push_wmops( "hq_core_enc" ); @@ -108,23 +467,17 @@ void hq_core_enc( /*-------------------------------------------------------------------------- * Detect signal transition *--------------------------------------------------------------------------*/ - is_transient = detect_transient( st, audio, input_frame ); - if ( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) ) { /*-------------------------------------------------------------------------- * IVAS switching frame *--------------------------------------------------------------------------*/ - L_spec = input_frame; left_overlap = -1; right_overlap = -1; - WindowSignal_flt( st->hTcxCfg, st->hTcxCfg->tcx_offsetFB, TRANSITION_OVERLAP, FULL_OVERLAP, &left_overlap, &right_overlap, st->hTcxEnc->speech_TCX_flt, &L_spec, wtda_audio, 1, 1 ); - TCX_MDCT_flt( wtda_audio, t_audio, left_overlap, L_spec - ( left_overlap + right_overlap ) / 2, right_overlap, st->element_mode ); - inner_frame = inner_frame_tbl[st->bwidth]; L_spec = l_spec_ext_tbl[st->bwidth]; is_transient = 0; @@ -230,51 +583,11 @@ void hq_core_enc( { /* HQ low rate encoder */ -#ifdef IVAS_FLOAT_FIXED -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - floatToFixed_arr32( t_audio, t_audio_fx, Q12, L_FRAME48k_EXT ); -#endif - hq_lr_enc_ivas_fx( st, t_audio_fx, inner_frame, &num_bits, is_transient ); - -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arrL( st->hHQ_core->last_ni_gain_fx, st->hHQ_core->last_ni_gain, Q17, BANDS_MAX ); - fixedToFloat_arr( st->hHQ_core->last_env_fx, st->hHQ_core->last_env, Q1, BANDS_MAX ); - fixedToFloat_arrL( t_audio_fx, t_audio, Q12, L_FRAME48k_EXT ); -#endif -#else hq_lr_enc( st, t_audio, inner_frame, &num_bits, is_transient ); -#endif } else { -#ifdef IVAS_FLOAT_FIXED - -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - floatToFixed_arrL32( t_audio, t_audio_fx, Q12, L_FRAME48k_EXT ); - IF( NE_16( st->element_mode, EVS_MONO ) ) - { - Word16 temp_e; - - f2me( st->hHQ_core->crest_lp, &st->hHQ_core->crest_lp_fx, &temp_e ); - st->hHQ_core->crest_lp_q = 31 - temp_e; - f2me( st->hHQ_core->crest_mod_lp, &st->hHQ_core->crest_mod_lp_fx, &temp_e ); - st->hHQ_core->crest_mod_lp_q = 31 - temp_e; - } -#endif - /* HQ high rate encoder */ - hq_hr_enc_ivas_fx( st, t_audio_fx, L_spec, &num_bits, is_transient, vad_hover_flag ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - IF( NE_16( st->element_mode, EVS_MONO ) ) - { - - st->hHQ_core->crest_lp = fix_to_float( st->hHQ_core->crest_lp_fx, st->hHQ_core->crest_lp_q ); - st->hHQ_core->crest_mod_lp = fix_to_float( st->hHQ_core->crest_mod_lp_fx, st->hHQ_core->crest_mod_lp_q ); - } - fixedToFloat_arrL32( t_audio_fx, t_audio, Q12, L_FRAME48k ); -#endif -#else hq_hr_enc( st, t_audio, L_spec, &num_bits, is_transient, vad_hover_flag ); -#endif } /* write all unused bits to the bitstream */ @@ -338,6 +651,7 @@ void hq_core_enc( return; } +#endif /*-------------------------------------------------------------------* * hq_core_enc_init() diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index 10ce8d816..7893c4994 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -324,7 +324,170 @@ ivas_error ivas_core_enc( if ( st->core == HQ_CORE ) { /* HQ core encoder */ + +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + PWord16 *p; + Word16 q = 0; + Word16 overlap, tmp, i; + Word16 left_overlap, right_overlap; + IF( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) ) + { + left_overlap = st->hTcxCfg->tcx_mdct_window_lengthFB; + right_overlap = st->hTcxCfg->tcx_mdct_window_delayFB; + Word16 L_frame1 = 640; + + p = st->hTcxCfg->tcx_mdct_window_minimumFB; + tmp = shr( st->hTcxCfg->tcx_mdct_window_min_lengthFB, 1 ); + + Word16 q1 = Q_factor_arr( st->hTcxCfg->tcx_mdct_window_minimumFB_flt, tmp ); + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimumFB_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimumFB_flt[i + tmp], q1 ); + } + + p = st->hTcxCfg->tcx_mdct_window_transFB; + tmp = shr( st->hTcxCfg->tcx_mdct_window_min_lengthFB, 1 ); + + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_transFB_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_transFB_flt[i + tmp], q1 ); + } + + p = st->hTcxCfg->tcx_aldo_window_2_FB; + tmp = shr( st->hTcxCfg->tcx_mdct_window_delayFB, 1 ); + + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_FB_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_FB_flt[i + tmp], q1 ); + } + + tmp = st->hTcxCfg->tcx_mdct_window_min_length / 2; + p = st->hTcxCfg->tcx_mdct_window_minimum; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimum_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_minimum_flt[i + tmp], q1 ); + } + + + Word16 l1 = ( shr( left_overlap, 1 ) * 2 ) + sub( L_frame1, shr( add( left_overlap, right_overlap ), 1 ) ) + ( shr( right_overlap, 1 ) * 2 ) - shr( left_overlap, 1 ) + st->hTcxCfg->tcx_offsetFB; + q = Q_factor_arr( st->hTcxEnc->speech_TCX_flt, l1 ) - 1; + st->hTcxEnc->q_speech_TCX = q; + + Word16 offset, overlap1; + offset = negate( shr( st->hTcxCfg->tcx_mdct_window_trans_lengthFB, 1 ) ); + overlap1 = st->hTcxCfg->tcx_mdct_window_trans_lengthFB; + + floatToFixed_arr( st->hTcxEnc->speech_TCX_flt - overlap1 / 2 + offset, st->hTcxEnc->speech_TCX - overlap1 / 2 + offset, q, l1 ); + floatToFixed_arr( st->hTcxEnc->speech_TCX_flt - overlap1 / 2 + offset, st->hTcxEnc->speech_TCX - overlap1 / 2 + offset, q, 2 * L_FRAME48k + 960 ); + + q1 = 15; + tmp = st->hTcxCfg->tcx_mdct_window_half_length / 2; + p = st->hTcxCfg->tcx_mdct_window_half; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_half_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_half_flt[i + tmp], q1 ); + } + + tmp = st->hTcxCfg->tcx_mdct_window_min_length / 2; + p = st->hTcxCfg->tcx_mdct_window_trans; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_mdct_window_trans_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_mdct_window_trans_flt[i + tmp], q1 ); + } + + overlap = st->hTcxCfg->tcx_mdct_window_length; + tmp = overlap / 2; + p = st->hTcxCfg->tcx_aldo_window_2; + FOR( i = 0; i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_flt[i], q1 ); + } + FOR( i = 0; i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_aldo_window_2_flt[i + tmp], q1 ); + } + + tmp = overlap / 2; + p = st->hTcxCfg->tcx_aldo_window_1_trunc; + FOR( i = -NS2SA( st->sr_core, N_ZERO_MDCT_NS ); i < tmp; i++ ) + { + p[i].v.im = float_to_fix16( st->hTcxCfg->tcx_aldo_window_1_trunc_flt[i], q1 ); + } + FOR( i = -NS2SA( st->sr_core, N_ZERO_MDCT_NS ); i < tmp; i++ ) + { + p[tmp - 1 - i].v.re = float_to_fix16( st->hTcxCfg->tcx_aldo_window_1_trunc_flt[i + tmp], q1 ); + } + + FOR( i = 0; i < L_FRAME32k / 2 + 64; i++ ) + { + st->hTcxEnc->Txnq[i] = float_to_fix16( st->hTcxEnc->Txnq_flt[i], 0 ); + } + } + ELSE + { + FOR( Word16 i = 0; i < L_FRAME32k; i++ ) + { + st->hTcxEnc->old_out_fx[i] = float_to_fix16( st->hTcxEnc->old_out[i], 0 ); + } + } + + IF( NE_16( st->element_mode, EVS_MONO ) ) + { + Word16 temp_e; + + f2me( st->hHQ_core->crest_lp, &st->hHQ_core->crest_lp_fx, &temp_e ); + st->hHQ_core->crest_lp_q = sub( Q31, temp_e ); + f2me( st->hHQ_core->crest_mod_lp, &st->hHQ_core->crest_mod_lp_fx, &temp_e ); + st->hHQ_core->crest_mod_lp_q = sub( Q31, temp_e ); + } + + floatToFixed_arr( st->input, st->input_fx, 0, 960 ); +#endif + + hq_core_enc_ivas_fx( st, st->input_fx, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] ); + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + IF( NE_16( st->element_mode, EVS_MONO ) ) + { + st->hHQ_core->crest_lp = fix_to_float( st->hHQ_core->crest_lp_fx, st->hHQ_core->crest_lp_q ); + st->hHQ_core->crest_mod_lp = fix_to_float( st->hHQ_core->crest_mod_lp_fx, st->hHQ_core->crest_mod_lp_q ); + } + IF( st->element_mode > EVS_MONO ) + { + FOR( i = 0; i < L_FRAME32k; i++ ) + { + st->hLPDmem->old_exc_flt[i] = fix16_to_float( st->hLPDmem->old_exc[i], 0 ); + } + } +#endif +#else hq_core_enc( st, st->input, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] ); +#endif } /*---------------------------------------------------------------------* diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 59710e651..57c977cb8 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -3313,15 +3313,6 @@ void hq_lr_enc_fx( const Word16 is_transient_fx /* i : Q0 : transient flag */ ); -void hq_hr_enc_ivas_fx( - Encoder_State *st, /* i/o: encoder state structure */ - Word32 *t_audio, /* i/o: transform-domain coefficients Q12*/ - const Word16 length, /* i : length of spectrum */ - Word16 *num_bits, /* i/o: number of available bits */ - const Word16 is_transient, /* i : transient flag */ - const Word16 vad_hover_flag /* i : VAD hangover flag */ -); - void hq_lr_enc_ivas_fx( Encoder_State *st_fx, /* i/o: : encoder state structure */ Word32 L_t_audio[], /* i/o: Q12 : transform-domain coefs. */ diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index 74d248f1c..6f42de541 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -1638,6 +1638,7 @@ typedef struct tcx_enc_structure float *new_speech_TCX_flt; Word16 *speech_TCX; Word16 *new_speech_TCX; + Word16 q_speech_TCX; int16_t tcxltp; int16_t tcxltp_pitch_int; -- GitLab From 04de10ed22078edb145d7ce0b5ede2edf605ad57 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Sat, 21 Sep 2024 12:58:25 +0530 Subject: [PATCH 2/2] Clang build and warning fix --- lib_enc/fd_cng_enc.c | 6 +++--- lib_enc/hq_core_enc.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib_enc/fd_cng_enc.c b/lib_enc/fd_cng_enc.c index 514a5ffbd..bfe987593 100644 --- a/lib_enc/fd_cng_enc.c +++ b/lib_enc/fd_cng_enc.c @@ -1372,7 +1372,7 @@ void FdCngEncodeMDCTStereoSID( { #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - + Word16 ms_ptr_e; Word16 weights_fx[24]; invTrfMatrix_fx = (Word32 *) tmpRAM_fx; @@ -1463,7 +1463,7 @@ void FdCngEncodeMDCTStereoSID( floatToFixed_arrL( lr_out_ptr[ch], lr_out_ptr_fx[ch], q, N ); #endif /* scale bands and get scalefactors */ - scalebands( lr_out_ptr_fx[ch], hFdCngEnc->partDec, N, hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec,sub( hFdCngEnc->stopBandDec , hFdCngEnc->startBandDec), hFdCngCom->cngNoiseLevel, 1 ); + scalebands( lr_out_ptr_fx[ch], hFdCngEnc->partDec, N, hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec, sub( hFdCngEnc->stopBandDec, hFdCngEnc->startBandDec ), hFdCngCom->cngNoiseLevel, 1 ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arrL( hFdCngCom->cngNoiseLevel, hFdCngCom->cngNoiseLevel_flt, q, FFTCLDFBLEN ); #endif @@ -1702,7 +1702,7 @@ void FdCngEncodeDiracMDCTStereoSID( floatToFixed_arrL( lr_out_ptr[ch], lr_out_ptr_fx[ch], q, N[ch] ); #endif /* scale bands and get scalefactors */ - scalebands( lr_out_ptr_fx[ch], hFdCngEnc->partDec, N[ch], hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec, sub(hFdCngEnc->stopBandDec , hFdCngEnc->startBandDec), hFdCngCom->cngNoiseLevel, 1 ); + scalebands( lr_out_ptr_fx[ch], hFdCngEnc->partDec, N[ch], hFdCngEnc->midbandDec, hFdCngEnc->nFFTpartDec, sub( hFdCngEnc->stopBandDec, hFdCngEnc->startBandDec ), hFdCngCom->cngNoiseLevel, 1 ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS fixedToFloat_arrL( hFdCngCom->cngNoiseLevel, hFdCngCom->cngNoiseLevel_flt, q, FFTCLDFBLEN ); #endif diff --git a/lib_enc/hq_core_enc.c b/lib_enc/hq_core_enc.c index e805cf4e0..277fdaaad 100644 --- a/lib_enc/hq_core_enc.c +++ b/lib_enc/hq_core_enc.c @@ -314,7 +314,7 @@ void hq_core_enc_ivas_fx( test(); test(); - IF( st->element_mode > EVS_MONO && ( st->last_core, ACELP_CORE || EQ_16( st->last_core, AMR_WB_CORE ) ) ) + IF( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || EQ_16( st->last_core, AMR_WB_CORE ) ) ) { overlap = st->hTcxCfg->tcx_mdct_window_length; move16(); -- GitLab