From ff64feadad387e41950254f37781ae124538ef50 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 19 Jul 2024 14:42:27 +0530 Subject: [PATCH 1/2] stereo_dft_enc_synthesize_fx implementation --- lib_com/fft_fx.c | 18 + lib_com/ivas_prot_fx.h | 14 + lib_com/prot_fx.h | 12 + lib_enc/ivas_core_enc.c | 410 ++++++++++++++++++- lib_enc/ivas_cpe_enc.c | 748 +++++++++++++++++++++++++++++++++- lib_enc/ivas_stat_enc.h | 21 +- lib_enc/ivas_stereo_dft_enc.c | 478 +++++++++++++++++++++- lib_enc/swb_pre_proc.c | 548 +++++++++++++++++++++++++ 8 files changed, 2230 insertions(+), 19 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index d7a1aa12f..024221ca6 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -7111,6 +7111,12 @@ void rfft_fx( s2 = -204; move16(); BREAK; + case 256: + s1 = 128; + move16(); + s2 = -128; + move16(); + BREAK; case 320: s1 = 102; move16(); @@ -7123,6 +7129,18 @@ void rfft_fx( s2 = -68; move16(); BREAK; + case 640: + s1 = 51; + move16(); + s2 = -51; + move16(); + BREAK; + case 960: + s1 = 34; + move16(); + s2 = -34; + move16(); + BREAK; default: s1 = -1; move16(); diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 39e8de6a0..c24ad3237 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -1123,6 +1123,20 @@ void stereo_dft_dec_fx( const Word16 num_md_sub_frames /* i : number of MD subframes */ ); +// ivas_stereo_dft_enc.c +#ifdef IVAS_FLOAT_FIXED +Word32 stereo_dft_enc_synthesize_fx( + STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle */ + // float *output, /* o : output synthesis */ + Word32 *output_fx, /* o : output synthesis Q16 */ + Word16 *output_start_index, + Word16 *output_end_index, + const Word16 chan, /* i : channel number */ + const Word32 input_Fs, /* i : input sampling rate */ + const Word32 output_sampling_rate, /* i : output sampling rate */ + const Word16 L_frame, /* i : frame length at internal Fs */ + Word16 *nrg_out_fx_e ); +#endif void ivas_ls_setup_conversion_fx( Decoder_Struct *st_ivas, /* i : IVAS decoder structure */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 524cf7b73..b3e016846 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -63,6 +63,7 @@ #include "ivas_cnst.h" #include "stat_enc.h" #include "stat_dec.h" +#include "ivas_stat_enc.h" #include "ivas_stat_dec.h" #include "ivas_error.h" #include "ivas_error_utils.h" @@ -10068,4 +10069,15 @@ Word16 sr2fscale( const Word32 sr_core /* i : internal sampling rate */ ); +// pre_proc functions +/*full implementation pending*/ +void swb_pre_proc_ivas_fx( + Encoder_State *st, /* i/o: encoder state structure */ + float *new_swb_speech, /* o : original input signal at 32kHz */ + Word32 *new_swb_speech_fx, /* o : original input signal at 32kHz */ + float *shb_speech, /* o : SHB target signal (6-14kHz) at 16kHz */ + float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : real buffer */ + float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : imag buffer */ + CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ +); #endif diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index ebc6bd649..0d39208e7 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -40,6 +40,7 @@ #include "wmc_auto.h" #include #ifdef IVAS_FLOAT_FIXED +#include "prot_fx.h" #include "ivas_prot_fx.h" #endif @@ -49,7 +50,7 @@ * * Principal IVAS core coder routine, where number of core channels is 1 or 2 *-------------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED ivas_error ivas_core_enc( SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ @@ -90,6 +91,10 @@ ivas_error ivas_core_enc( float hb_speech[L_FRAME16k / 4]; float *new_swb_speech; float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX]; +#ifdef IVAS_FLOAT_FIXED + Word32 *new_swb_speech_fx; + Word32 new_swb_speech_buffer_fx[L_FRAME48k + STEREO_DFT_OVL_MAX]; +#endif float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET]; float voice_factors[CPE_CHANNELS][NB_SUBFR16k]; int16_t Voicing_flag[CPE_CHANNELS]; @@ -321,6 +326,408 @@ ivas_error ivas_core_enc( } + /*---------------------------------------------------------------------* + * Postprocessing, BWEs and Updates + *---------------------------------------------------------------------*/ + + for ( n = 0; n < n_CoreChannels; n++ ) + { + st = sts[n]; + + /*---------------------------------------------------------------------* + * Postprocessing for ACELP/HQ core switching + *---------------------------------------------------------------------*/ + + core_switching_post_enc( st, old_inp_12k8[n], old_inp_16k[n], A[n] ); + + /*---------------------------------------------------------------------* + * WB TBE encoding + * WB BWE encoding + *---------------------------------------------------------------------*/ + + if ( input_Fs >= 16000 && st->bwidth < SWB && st->hBWE_TD != NULL ) + { + /* Common pre-processing for WB TBE and WB BWE */ + wb_pre_proc( st, last_element_mode, new_inp_resamp16k[n], hb_speech ); + } + + if ( st->extl == WB_TBE ) + { + /* WB TBE encoder */ + wb_tbe_enc( st, hb_speech, bwe_exc_extended[n], voice_factors[n], pitch_buf[n] ); + } + else if ( st->extl == WB_BWE && n == 0 && st->element_mode != IVAS_CPE_MDCT ) + { + /* WB BWE encoder */ + wb_bwe_enc( st, new_inp_resamp16k[n] ); + } + + /*---------------------------------------------------------------------* + * SWB(FB) TBE encoding + * SWB(FB) BWE encoding + *---------------------------------------------------------------------*/ + + new_swb_speech = new_swb_speech_buffer + STEREO_DFT_OVL_MAX; +#ifdef IVAS_FLOAT_FIXED + new_swb_speech_fx = new_swb_speech_buffer_fx + STEREO_DFT_OVL_MAX; +#endif + + if ( !st->Opt_SC_VBR && input_Fs >= 32000 && st->hBWE_TD != NULL ) + { + /* Common pre-processing for SWB(FB) TBE and SWB(FB) BWE */ +#ifdef IVAS_FLOAT_FIXED + swb_pre_proc_ivas_fx( st, new_swb_speech, new_swb_speech_fx, shb_speech, realBuffer[n], imagBuffer[n], hCPE ); +#else + swb_pre_proc( st, new_swb_speech, shb_speech, realBuffer[n], imagBuffer[n], hCPE ); +#endif + } + else if ( input_Fs >= 32000 ) + { + if ( st->hBWE_TD != NULL ) + { + InitSWBencBufferStates( st->hBWE_TD, shb_speech ); + } + } + + /* SWB TBE encoder */ + if ( st->extl == SWB_TBE || st->extl == FB_TBE ) + { + if ( st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 ) + { + float fb_exc[L_FRAME16k]; + + swb_tbe_enc( st, hStereoICBWE, shb_speech, bwe_exc_extended[n], voice_factors[n], fb_exc, pitch_buf[n] ); + + if ( st->extl == FB_TBE ) + { + /* FB TBE encoder */ + fb_tbe_enc( st, st->input, fb_exc ); + } + } + } + else if ( st->extl == SWB_BWE || st->extl == FB_BWE ) + { + /* SWB(FB) BWE encoder */ + swb_bwe_enc( st, last_element_mode, old_inp_12k8[n], old_inp_16k[n], old_syn_12k8_16k[n], new_swb_speech, shb_speech ); + } + + /*---------------------------------------------------------------------* + * SWB DTX/CNG encoding + *---------------------------------------------------------------------*/ + + if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && ( input_frame >= L_FRAME32k || st->element_mode == IVAS_CPE_DFT ) ) + { + /* SHB DTX/CNG encoder */ + swb_CNG_enc( st, shb_speech, old_syn_12k8_16k[n] ); + } + + /*-------------------------------------------------------------------* + * Inter-channel BWE encoding + *-------------------------------------------------------------------*/ + + if ( n == 0 && input_Fs >= 32000 && hStereoICBWE != NULL ) + { + stereo_icBWE_preproc( hCPE, input_frame, new_swb_speech_buffer /*tmp buffer*/ ); + + stereo_icBWE_enc( hCPE, shb_speech, new_swb_speech_buffer, voice_factors[0] ); + } + + /*---------------------------------------------------------------------* + * Channel-aware mode - write signaling information into the bitstream + *---------------------------------------------------------------------*/ + + signaling_enc_rf( st ); + + /*---------------------------------------------------------------------* + * Common updates + *---------------------------------------------------------------------*/ + + if ( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */ + { + updt_enc_common( st ); + } + } + + /*------------------------------------------------------------------* + * Write potentially unused bits in combined format coding + *-----------------------------------------------------------------*/ + + if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 ) + { + while ( diff_nBits > 0 ) + { + n = min( diff_nBits, 16 ); + push_indice( sts[0]->hBstr, IND_UNUSED, 0, n ); + diff_nBits -= n; + } + } + + + pop_wmops(); + + return error; +} +#else +ivas_error ivas_core_enc( + SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */ + CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ + MCT_ENC_HANDLE hMCT, /* i/o: MCT encoder structure */ + const int16_t n_CoreChannels, /* i : number of core channels to be coded */ + float old_inp_12k8[][L_INP_12k8], /* i : buffer of old input signal */ + float old_inp_16k[][L_INP], /* i : buffer of old input signal */ + float ener[], /* i : residual energy from Levinson-Durbin */ + float A[][NB_SUBFR16k * ( M + 1 )], /* i : A(z) unquantized for the 4 subframes */ + float Aw[][NB_SUBFR16k * ( M + 1 )], /* i : weighted A(z) unquantized for subframes */ + float epsP[][M + 1], /* i : LP prediction errors */ + float lsp_new[][M], /* i : LSPs at the end of the frame */ + float lsp_mid[][M], /* i : LSPs in the middle of the frame */ + const int16_t vad_hover_flag[], /* i : VAD hanglover flag */ + int16_t attack_flag[], /* i : attack flag (GSC or TC) */ + float realBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer */ + float imagBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer */ + float old_wsp[][L_WSP], /* i : weighted input signal buffer */ + const int16_t loc_harm[], /* i : harmonicity flag */ + const float cor_map_sum[], /* i : speech/music clasif. parameter */ + const int16_t vad_flag_dtx[], /* i : HE-SAD flag with additional DTX HO */ + float enerBuffer[][CLDFB_NO_CHANNELS_MAX], /* i : energy buffer */ + float fft_buff[][2 * L_FFT], /* i : FFT buffer */ + const int16_t tdm_SM_or_LRTD_Pri, /* i : channel combination scheme flag */ + const int16_t ivas_format, /* i : IVAS format */ + const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */ +) +{ + int16_t n, input_frame; + int16_t cpe_id, MCT_flag; + Encoder_State **sts, *st; + STEREO_ICBWE_ENC_HANDLE hStereoICBWE; + STEREO_TD_ENC_DATA_HANDLE hStereoTD; + float *inp[CPE_CHANNELS]; + float new_inp_resamp16k[CPE_CHANNELS][L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */ + float old_syn_12k8_16k[CPE_CHANNELS][L_FRAME16k]; /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */ + float shb_speech[L_FRAME16k]; + float hb_speech[L_FRAME16k / 4]; + float *new_swb_speech; + float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX]; + float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET]; + float voice_factors[CPE_CHANNELS][NB_SUBFR16k]; + int16_t Voicing_flag[CPE_CHANNELS]; + float pitch_buf[CPE_CHANNELS][NB_SUBFR16k]; + int16_t unbits[CPE_CHANNELS]; + float tdm_lspQ_PCh[M], tdm_lsfQ_PCh[M]; + int16_t last_element_mode, tdm_Pitch_reuse_flag; + int32_t element_brate, last_element_brate, input_Fs; + int16_t diff_nBits; + ivas_error error; + int16_t max_num_indices_BWE; + + push_wmops( "ivas_core_enc" ); + + error = IVAS_ERR_OK; + + /*------------------------------------------------------------------* + * General initialization + *-----------------------------------------------------------------*/ + + if ( hSCE != NULL ) + { + cpe_id = -1; + MCT_flag = 0; + sts = hSCE->hCoreCoder; + hStereoTD = NULL; + hStereoICBWE = NULL; + element_brate = hSCE->element_brate; + last_element_brate = hSCE->last_element_brate; + last_element_mode = IVAS_SCE; + tdm_Pitch_reuse_flag = -1; + } + else + { + cpe_id = hCPE->cpe_id; + MCT_flag = 0; + if ( hMCT != NULL ) + { + MCT_flag = 1; + } + sts = hCPE->hCoreCoder; + hStereoICBWE = hCPE->hStereoICBWE; + element_brate = hCPE->element_brate; + last_element_brate = hCPE->last_element_brate; + last_element_mode = hCPE->last_element_mode; + + if ( hCPE->hStereoTD != NULL ) + { + hStereoTD = hCPE->hStereoTD; + tdm_Pitch_reuse_flag = hCPE->hStereoTD->tdm_Pitch_reuse_flag; + } + else + { + hStereoTD = NULL; + tdm_Pitch_reuse_flag = -1; + } + } + + input_Fs = sts[0]->input_Fs; + input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC ); + + set_f( new_swb_speech_buffer, 0, L_FRAME48k + STEREO_DFT_OVL_MAX ); + + for ( n = 0; n < n_CoreChannels; n++ ) + { + st = sts[n]; + + /*------------------------------------------------------------------* + * Initializiation per core-coder channel + *-----------------------------------------------------------------*/ + + + st->extl = -1; + unbits[n] = 0; + + st->element_brate = element_brate; + + /*---------------------------------------------------------------------* + * Pre-processing, incl. Decision matrix + *---------------------------------------------------------------------*/ + + if ( ( error = pre_proc_ivas( st, last_element_mode, element_brate, ivas_format == SBA_FORMAT ? last_element_brate : element_brate, input_frame, old_inp_12k8[n], old_inp_16k[n], &inp[n], &ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], new_inp_resamp16k[n], &Voicing_flag[n], old_wsp[n], loc_harm[n], cor_map_sum[n], vad_flag_dtx[n], enerBuffer[n], fft_buff[n], MCT_flag, vad_hover_flag[n], flag_16k_smc ) ) != IVAS_ERR_OK ) + { + return error; + } + + if ( st->element_mode == IVAS_CPE_MDCT || st->element_mode == IVAS_SCE ) + { + st->enablePlcWaveadjust = 0; + } + } + + /*------------------------------------------------------------------* + * Sanity check in combined format coding + *-----------------------------------------------------------------*/ + + diff_nBits = 0; + if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 ) + { + ivas_combined_format_brate_sanity( hCPE->element_brate, sts[0]->core, sts[0]->total_brate, &( sts[0]->core_brate ), &( sts[0]->inactive_coder_type_flag ), &diff_nBits ); + } + + /*---------------------------------------------------------------------* + * Core Encoding + *---------------------------------------------------------------------*/ + + for ( n = 0; n < n_CoreChannels; n++ ) + { + st = sts[n]; + + /* update pointer to the buffer of indices of the second channel */ + if ( n == 1 && st->element_mode == IVAS_CPE_TD ) + { + /* adjust the pointer to the buffer of indices of the secondary channel (make space for BWE indices) */ + max_num_indices_BWE = get_BWE_max_num_indices( sts[0]->extl_brate ); + st->hBstr->ind_list = sts[0]->hBstr->ind_list + sts[0]->hBstr->nb_ind_tot + max_num_indices_BWE; + + /* write TD stereo spatial parameters */ + move_indices( hStereoTD->tdm_hBstr_tmp.ind_list, st->hBstr->ind_list, hStereoTD->tdm_hBstr_tmp.nb_ind_tot ); + st->hBstr->nb_ind_tot += hStereoTD->tdm_hBstr_tmp.nb_ind_tot; + st->hBstr->nb_bits_tot += hStereoTD->tdm_hBstr_tmp.nb_bits_tot; + + reset_indices_enc( &hStereoTD->tdm_hBstr_tmp, MAX_IND_TDM_TMP ); + } + + /*---------------------------------------------------------------------* + * Write signaling info into the bitstream + *---------------------------------------------------------------------*/ + + if ( !MCT_flag || ( MCT_flag && cpe_id == 0 ) ) + { + ivas_signaling_enc( st, MCT_flag, element_brate, tdm_SM_or_LRTD_Pri, tdm_Pitch_reuse_flag ); + } + + /*---------------------------------------------------------------------* + * Preprocessing (preparing) for ACELP/HQ core switching + *---------------------------------------------------------------------*/ + + core_switching_pre_enc( st, old_inp_12k8[n], old_inp_16k[n], sts[0]->active_cnt, last_element_mode ); + + /*---------------------------------------------------------------------* + * ACELP core encoding + * TCX core encoding + * HQ core encoding + *---------------------------------------------------------------------*/ + + if ( st->core == ACELP_CORE ) + { + /* ACELP core encoder */ + if ( ( error = acelp_core_enc( st, inp[n], ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], vad_hover_flag[0], attack_flag[n], bwe_exc_extended[n], voice_factors[n], old_syn_12k8_16k[n], pitch_buf[n], &unbits[n], hStereoTD, tdm_lsfQ_PCh ) ) != IVAS_ERR_OK ) + { + return error; + } + } + + if ( ( st->core == TCX_20_CORE || st->core == TCX_10_CORE ) && st->element_mode != IVAS_CPE_MDCT ) + { + /* TCX core encoder */ + stereo_tcx_core_enc( st, old_inp_12k8[n] + L_INP_MEM, old_inp_16k[n] + L_INP_MEM, Aw[n], lsp_new[n], lsp_mid[n], pitch_buf[n], last_element_mode, vad_hover_flag[0] ); + } + + if ( st->core == HQ_CORE ) + { + /* HQ core encoder */ + hq_core_enc( st, st->input, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] ); + } + + /*---------------------------------------------------------------------* + * TD stereo updates + *---------------------------------------------------------------------*/ + + if ( st->element_mode == IVAS_CPE_TD && n == 0 ) + { + td_stereo_param_updt( st->lsp_old, st->lsf_old, pitch_buf[0], tdm_lspQ_PCh, tdm_lsfQ_PCh, hStereoTD->tdm_Pri_pitch_buf, st->flag_ACELP16k, hStereoTD->tdm_use_IAWB_Ave_lpc ); + } + } + + + /*---------------------------------------------------------------------* + * MDCT stereo: joint TCX Core Encoding + *---------------------------------------------------------------------*/ + + if ( sts[0]->element_mode == IVAS_CPE_MDCT ) + { + if ( sts[0]->core_brate > SID_2k40 && sts[1]->core_brate > SID_2k40 ) + { + if ( MCT_flag ) + { + ivas_mdct_core_whitening_enc( hCPE, old_inp_16k, old_wsp, pitch_buf, hMCT->p_mdst_spectrum_long[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long[cpe_id], + hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE ); + } + else + { + stereo_mdct_core_enc( hCPE, old_inp_16k, old_wsp, pitch_buf ); + } + } + else if ( sts[0]->core_brate == SID_2k40 && sts[1]->core_brate == SID_2k40 ) + { + /* synch CNG configs between channels */ + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + st = sts[n]; + if ( ( st->hFdCngEnc->hFdCngCom->frameSize != st->L_frame ) || ( st->hFdCngEnc->hFdCngCom->CngBandwidth != st->bwidth ) ) + { + configureFdCngEnc( st->hFdCngEnc, max( st->bwidth, WB ), st->L_frame == L_FRAME16k ? ACELP_16k40 : ACELP_9k60 ); + } + } + + if ( sts[0]->cng_sba_flag ) + { + FdCngEncodeDiracMDCTStereoSID( hCPE ); + } + else + { + FdCngEncodeMDCTStereoSID( hCPE ); + } + } + } + + /*---------------------------------------------------------------------* * Postprocessing, BWEs and Updates *---------------------------------------------------------------------*/ @@ -455,3 +862,4 @@ ivas_error ivas_core_enc( return error; } +#endif \ No newline at end of file diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index da80d795c..9b8b4bfc5 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -59,7 +59,7 @@ static void stereo_mode_combined_format_enc( const Encoder_Struct *st_ivas, CPE_ * * Channel Pair Element (CPE) encoding routine *-------------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED ivas_error ivas_cpe_enc( Encoder_Struct *st_ivas, /* i/o: IVAS encoder structure */ const int16_t cpe_id, /* i : CPE # identifier */ @@ -72,8 +72,14 @@ ivas_error ivas_cpe_enc( CPE_ENC_HANDLE hCPE; Encoder_State **sts; int16_t n, n_CoreChannels; - float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ - float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ + float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ +#ifdef IVAS_FLOAT_FIXED + Word32 old_inp_12k8_fx[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ +#endif + float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ +#ifdef IVAS_FLOAT_FIXED + Word32 old_inp_16k_fx[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ +#endif float ener[CPE_CHANNELS]; /* residual energy from Levinson-Durbin */ float relE[CPE_CHANNELS]; /* frame relative energy */ float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ @@ -455,6 +461,83 @@ ivas_error ivas_cpe_enc( internal_Fs = getTcxonly_ivas( IVAS_CPE_MDCT, sts[0]->bits_frame_nominal * FRAMES_PER_SEC, 0, sts[0]->is_ism_format ) == 0 ? INT_FS_16k : max( INT_FS_16k, sts[0]->sr_core ); /* iDFT at input sampling rate */ +#ifdef IVAS_FLOAT_FIXED + /*flt2fix*/ + f2me_buf( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], &hCPE->hStereoDft->DFT_fx_e[0], STEREO_DFT_N_MAX_ENC ); + f2me_buf( hCPE->hStereoDft->DFT[1], hCPE->hStereoDft->DFT_fx[1], &hCPE->hStereoDft->DFT_fx_e[1], STEREO_DFT_N_MAX_ENC ); + if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] ) + { + floatToFixed_arr( sts[1]->old_inp_12k8, sts[1]->old_inp_12k8_fx, 0, L_INP_MEM ); + } + f2me( hCPE->hStereoDft->icbweRefEner, &hCPE->hStereoDft->icbweRefEner_fx, &hCPE->hStereoDft->icbweRefEner_fx_e ); + f2me( hCPE->hStereoDft->lbEner, &hCPE->hStereoDft->lbEner_fx, &hCPE->hStereoDft->lbEner_fx_e ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_res_8k, hCPE->hStereoDft->output_mem_res_8k_fx, 16, STEREO_DFT_OVL_8k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx, hCPE->hStereoDft->output_mem_dmx_fx, 16, STEREO_DFT_OVL_MAX ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_12k8, hCPE->hStereoDft->output_mem_dmx_12k8_fx, 16, STEREO_DFT_OVL_12k8 ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k, hCPE->hStereoDft->output_mem_dmx_16k_fx, 16, STEREO_DFT_OVL_16k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb, hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, 16, STEREO_DFT_OVL_16k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_32k, hCPE->hStereoDft->output_mem_dmx_32k_fx, 16, STEREO_DFT_OVL_32k ); + /*flt2fix end*/ + + // stereo_dft_enc_synthesize( hCPE->hStereoDft, sts[0]->input, 0, input_Fs, input_Fs, 0 ); + Word16 out_start_ind, out_end_ind; + Word16 out_12k8_start_ind[CPE_CHANNELS], out_12k8_end_ind[CPE_CHANNELS]; + Word16 out_16k_start_ind = 0, out_16k_end_ind = 0; + move16(); + move16(); + stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, sts[0]->input32_fx, &out_start_ind, &out_end_ind, 0, input_Fs, input_Fs, 0, NULL ); + + /* iDFT & resampling to 12.8kHz internal sampling rate */ + // stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[0] + L_INP_MEM, 0, input_Fs, INT_FS_12k8, 0 ); + stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_inp_12k8_fx[0] + L_INP_MEM, &out_12k8_start_ind[0], &out_12k8_end_ind[0], 0, input_Fs, INT_FS_12k8, 0, NULL ); + + /* iDFT & resampling to 16kHz internal sampling rate for M channel */ + IF( EQ_32( input_Fs, internal_Fs ) ) + { + // mvr2r( sts[0]->input - STEREO_DFT_OVL_16k, old_inp_16k[0] + L_INP_MEM - STEREO_DFT_OVL_16k, input_frame + STEREO_DFT_OVL_16k ); + Copy32( sts[0]->input32_fx - STEREO_DFT_OVL_16k, old_inp_16k_fx[0] + L_INP_MEM - STEREO_DFT_OVL_16k, input_frame + STEREO_DFT_OVL_16k ); + out_16k_start_ind = -STEREO_DFT_OVL_16k; + out_16k_end_ind = out_16k_start_ind + input_frame + STEREO_DFT_OVL_16k; + } + ELSE + { + // stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_16k[0] + L_INP_MEM, 0, input_Fs, internal_Fs, 0 ); + stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_inp_16k_fx[0] + L_INP_MEM, &out_16k_start_ind, &out_16k_end_ind, 0, input_Fs, internal_Fs, 0, NULL ); + } + + /* DFT Stereo: iDFT of residual signal at 8kHz sampling rate */ + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] ) + { + // mvr2r( sts[1]->old_inp_12k8, old_inp_12k8[1], L_INP_MEM ); + Copy_Scale_sig_16_32_no_sat( sts[1]->old_inp_12k8_fx, old_inp_12k8_fx[1], L_INP_MEM, 16 - 0 ); + // stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[1] + L_INP_MEM, 1, input_Fs, 8000, 0 ); + stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_inp_12k8_fx[1] + L_INP_MEM, &out_12k8_start_ind[1], &out_12k8_end_ind[1], 1, input_Fs, 8000, 0, NULL ); + + /* update old input signal buffer */ + // mvr2r( old_inp_12k8[1] + L_FRAME8k, sts[1]->old_inp_12k8, L_INP_MEM ); + Copy_Scale_sig_32_16( old_inp_12k8_fx[1] + L_FRAME8k, sts[1]->old_inp_12k8_fx, L_INP_MEM, 0 - 16 ); + } + + /*fix2flt*/ + hCPE->hStereoDft->icbweRefEner = me2f( hCPE->hStereoDft->icbweRefEner_fx, hCPE->hStereoDft->icbweRefEner_fx_e ); + hCPE->hStereoDft->lbEner = me2f( hCPE->hStereoDft->lbEner_fx, hCPE->hStereoDft->lbEner_fx_e ); + fixedToFloat_arrL( sts[0]->input32_fx + out_start_ind, sts[0]->input + out_start_ind, 16, out_end_ind - out_start_ind ); + fixedToFloat_arrL( old_inp_12k8_fx[0] + L_INP_MEM + out_12k8_start_ind[0], old_inp_12k8[0] + L_INP_MEM + out_12k8_start_ind[0], 16, out_12k8_end_ind[0] - out_12k8_start_ind[0] ); + fixedToFloat_arrL( old_inp_16k_fx[0] + L_INP_MEM + out_16k_start_ind, old_inp_16k[0] + L_INP_MEM + out_16k_start_ind, 16, out_16k_end_ind - out_16k_start_ind ); + if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] ) + { + fixedToFloat_arr( sts[1]->old_inp_12k8_fx, sts[1]->old_inp_12k8, 0, L_INP_MEM ); + fixedToFloat_arrL( old_inp_12k8_fx[1] + L_INP_MEM + out_12k8_start_ind[1], old_inp_12k8[1] + L_INP_MEM + out_12k8_start_ind[1], 16, out_12k8_end_ind[1] - out_12k8_start_ind[1] ); + } + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_res_8k_fx, hCPE->hStereoDft->output_mem_res_8k, 16, STEREO_DFT_OVL_8k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_fx, hCPE->hStereoDft->output_mem_dmx, 16, STEREO_DFT_OVL_MAX ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_12k8_fx, hCPE->hStereoDft->output_mem_dmx_12k8, 16, STEREO_DFT_OVL_12k8 ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_fx, hCPE->hStereoDft->output_mem_dmx_16k, 16, STEREO_DFT_OVL_16k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, hCPE->hStereoDft->output_mem_dmx_16k_shb, 16, STEREO_DFT_OVL_16k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_32k_fx, hCPE->hStereoDft->output_mem_dmx_32k, 16, STEREO_DFT_OVL_32k ); + /*fix2flt end*/ +#else stereo_dft_enc_synthesize( hCPE->hStereoDft, sts[0]->input, 0, input_Fs, input_Fs, 0 ); /* iDFT & resampling to 12.8kHz internal sampling rate */ @@ -479,7 +562,7 @@ ivas_error ivas_cpe_enc( /* update old input signal buffer */ mvr2r( old_inp_12k8[1] + L_FRAME8k, sts[1]->old_inp_12k8, L_INP_MEM ); } - +#endif /* no iDFT at input sampling rate for Side channel -> reset the buffer */ set_zero( sts[1]->input, input_frame ); } @@ -783,7 +866,664 @@ ivas_error ivas_cpe_enc( pop_wmops(); return error; } +#else +ivas_error ivas_cpe_enc( + Encoder_Struct *st_ivas, /* i/o: IVAS encoder structure */ + const int16_t cpe_id, /* i : CPE # identifier */ + float data_f_ch0[], /* i : input signal for channel 0 */ + float data_f_ch1[], /* i : input signal for channel 1 */ + const int16_t input_frame, /* i : input frame length per channel */ + const int16_t nb_bits_metadata /* i : number of metadata bits */ +) +{ + CPE_ENC_HANDLE hCPE; + Encoder_State **sts; + int16_t n, n_CoreChannels; + float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8 */ + float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz */ + float ener[CPE_CHANNELS]; /* residual energy from Levinson-Durbin */ + float relE[CPE_CHANNELS]; /* frame relative energy */ + float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )]; /* A(z) unquantized for subframes */ + float Aw[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )]; /* weighted A(z) unquantized for subframes */ + float epsP[CPE_CHANNELS][M + 1]; /* LP prediction errors */ + float lsp_new[CPE_CHANNELS][M]; /* LSPs at the end of the frame */ + float lsp_mid[CPE_CHANNELS][M]; /* ISPs in the middle of the frame */ + int16_t vad_hover_flag[CPE_CHANNELS]; /* VAD hangover flag */ + int16_t attack_flag[CPE_CHANNELS]; /* attack flag (GSC or TC) */ + float realBuffer[CPE_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; /* real buffer */ + float imagBuffer[CPE_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; /* imag buffer */ + float old_wsp[CPE_CHANNELS][L_WSP]; /* old weighted input signal */ + float pitch_fr[CPE_CHANNELS][NB_SUBFR]; /* fractional pitch values */ + float voicing_fr[CPE_CHANNELS][NB_SUBFR]; /* fractional pitch gains */ + int16_t loc_harm[CPE_CHANNELS]; /* harmonicity flag */ + float cor_map_sum[CPE_CHANNELS]; /* speech/music clasif. parameter */ + int16_t vad_flag_dtx[CPE_CHANNELS]; /* HE-SAD flag with additional DTX HO */ + float enerBuffer[CPE_CHANNELS][CLDFB_NO_CHANNELS_MAX]; /* energy buffer */ + float currFlatness[CPE_CHANNELS]; /* flatness parameter */ + float fft_buff[CPE_CHANNELS][2 * L_FFT]; /* FFT buffer */ + int16_t tdm_ratio_idx, tdm_ratio_idx_SM; /* temp. TD stereo parameters */ + int16_t tdm_SM_or_LRTD_Pri; /* temp. TD stereo parameters */ + float tdm_last_ratio; /* temp. TD stereo parameters */ + int16_t nb_bits; /* number of DFT stereo side bits */ + float fr_bands[CPE_CHANNELS][2 * NB_BANDS]; /* energy in frequency bands */ + float Etot_LR[CPE_CHANNELS]; /* total energy */ + float lf_E[CPE_CHANNELS][2 * VOIC_BINS]; /* per bin spectrum energy in lf */ + int16_t localVAD_HE_SAD[CPE_CHANNELS]; /* HE-SAD flag without hangover, LR channels */ + float band_energies_LR[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */ + float orig_input[CPE_CHANNELS][L_FRAME48k]; + float Etot_last[CPE_CHANNELS]; + int32_t tmp, input_Fs; + int16_t max_bwidth, ivas_format; + ENCODER_CONFIG_HANDLE hEncoderConfig; + int32_t ivas_total_brate; + ivas_error error; + int32_t cpe_brate; + int32_t element_brate_ref; + int16_t last_bits_frame_nominal; /* last_bits_frame_nominal for M or PCh channel */ + + error = IVAS_ERR_OK; + push_wmops( "ivas_cpe_enc" ); + + hCPE = st_ivas->hCPE[cpe_id]; + sts = hCPE->hCoreCoder; + hEncoderConfig = st_ivas->hEncoderConfig; + max_bwidth = hEncoderConfig->max_bwidth; + ivas_format = hEncoderConfig->ivas_format; + input_Fs = hEncoderConfig->input_Fs; + ivas_total_brate = hEncoderConfig->ivas_total_brate; + element_brate_ref = hCPE->element_brate; + last_bits_frame_nominal = sts[0]->bits_frame_nominal; + + /*------------------------------------------------------------------* + * Initialization - general + *-----------------------------------------------------------------*/ + + tdm_SM_or_LRTD_Pri = 0; + tdm_ratio_idx = -1; + tdm_ratio_idx_SM = -1; + tdm_last_ratio = 0; + + + /*------------------------------------------------------------------* + * CPE initialization - core coder + *-----------------------------------------------------------------*/ + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + sts[n]->idchan = n; + sts[n]->core = -1; + sts[n]->core_brate = -1; /* updated in dtx() */ + sts[n]->max_bwidth = max_bwidth; + if ( st_ivas->hMCT == NULL ) /*already updated before CPE call*/ + { + sts[n]->input_bwidth = sts[n]->last_input_bwidth; /* updated in BWD */ + sts[n]->bwidth = sts[n]->last_bwidth; /* updated in BWD */ + } + sts[n]->rate_switching_reset = 0; + } + mvr2r( data_f_ch0, sts[0]->input, input_frame ); + if ( data_f_ch1 != NULL ) /*this may happen for cases with odd number of channels*/ + { + mvr2r( data_f_ch1, sts[1]->input, input_frame ); + } + + /*----------------------------------------------------------------* + * Stereo technology selection + * Front-VAD on input L and R channels + *----------------------------------------------------------------*/ + + if ( sts[0]->ini_frame > 0 && st_ivas->hMCT == NULL ) + { + hCPE->element_mode = select_stereo_mode( hCPE, ivas_format ); + } + + stereo_mode_combined_format_enc( st_ivas, hCPE ); + + if ( ( error = front_vad( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_LR, NULL, NULL ) ) != IVAS_ERR_OK ) + { + return error; + } + + sts[0]->element_mode = hCPE->element_mode; + sts[1]->element_mode = hCPE->element_mode; + + n_CoreChannels = 2; + if ( hCPE->element_mode == IVAS_CPE_DFT ) + { + n_CoreChannels = 1; /* in DFT stereo, only M channel is coded */ + + sts[1]->vad_flag = 0; + } + + /*----------------------------------------------------------------* + * dynamically allocate data structures depending on the actual stereo mode + *----------------------------------------------------------------*/ + + if ( ( error = stereo_memory_enc( hCPE, input_Fs, max_bwidth, &tdm_last_ratio, ivas_format, st_ivas->nchan_transport ) ) != IVAS_ERR_OK ) + { + return error; + } + + + /*----------------------------------------------------------------* + * Set TD stereo parameters + *----------------------------------------------------------------*/ + + if ( ( error = stereo_set_tdm( hCPE, input_frame ) ) != IVAS_ERR_OK ) + { + return error; + } + + /*----------------------------------------------------------------* + * Resets/updates in case of stereo switching + *----------------------------------------------------------------*/ + + stereo_switching_enc( hCPE, sts[0]->old_input_signal, input_frame ); + + /*----------------------------------------------------------------* + * Temporal inter-channel alignment, stereo adjustment + *----------------------------------------------------------------*/ + + stereo_tca_enc( hCPE, input_frame ); + + /*----------------------------------------------------------------* + * Input signal buffering - needed in IC-BWE and TD ITD in MDCT stereo + *----------------------------------------------------------------*/ + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + mvr2r( sts[n]->input, orig_input[n], input_frame ); + + if ( hCPE->hStereoICBWE != NULL ) + { + hCPE->hStereoICBWE->dataChan[n] = &orig_input[n][0]; + } + } + + /*---------------------------------------------------------------* + * Time Domain Transient Detector + *---------------------------------------------------------------*/ + + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + if ( sts[n]->hTranDet == NULL ) + { + currFlatness[n] = 0; + continue; + } + + if ( !( ivas_format == MC_FORMAT && st_ivas->mc_mode == MC_MODE_PARAMMC ) ) + { + RunTransientDetection( sts[n]->input, input_frame, sts[n]->hTranDet ); + } + currFlatness[n] = GetTCXAvgTemporalFlatnessMeasure( sts[n]->hTranDet, NSUBBLOCKS, 0 ); + } + + /* Synchonize detection for downmix-based stereo */ + if ( hCPE->element_mode == IVAS_CPE_DFT || hCPE->element_mode == IVAS_CPE_TD ) + { + set_transient_stereo( hCPE, currFlatness ); + } + + /*----------------------------------------------------------------* + * Configuration of stereo encoder + *----------------------------------------------------------------*/ + + for ( n = 0; n < n_CoreChannels; n++ ) + { + /* Force to MODE1 in IVAS */ + sts[n]->codec_mode = MODE1; + + sts[n]->element_mode = hCPE->element_mode; + } + + + if ( hCPE->element_mode != IVAS_CPE_MDCT && ( hCPE->element_brate != hCPE->last_element_brate || hCPE->last_element_mode != hCPE->element_mode || sts[0]->ini_frame == 0 || + ( ivas_total_brate != st_ivas->hEncoderConfig->last_ivas_total_brate ) || sts[0]->last_core_brate <= SID_2k40 ) ) /* If the last frame was SID or NO_DATA, we need to run stereo_dft_config here since VAD decision is not known yet */ + { + if ( st_ivas->hQMetaData != NULL ) + { + if ( ivas_format == MASA_ISM_FORMAT && st_ivas->ism_mode != ISM_MODE_NONE ) + { + stereo_dft_config( hCPE->hStereoDft == NULL ? NULL : hCPE->hStereoDft->hConfig, (int32_t) ( 0.70f * st_ivas->hQMetaData->bits_frame_nominal * FRAMES_PER_SEC ), &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal ); + } + else + { + stereo_dft_config( hCPE->hStereoDft == NULL ? NULL : hCPE->hStereoDft->hConfig, st_ivas->hQMetaData->bits_frame_nominal * FRAMES_PER_SEC, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal ); + } + } + else + { + /* note; "bits_frame_nominal" needed in TD stereo as well */ + stereo_dft_config( hCPE->hStereoDft == NULL ? NULL : hCPE->hStereoDft->hConfig, hCPE->element_brate, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal ); + } + } + + if ( hCPE->element_mode == IVAS_CPE_TD ) + { + if ( hCPE->hStereoTD->tdm_LRTD_flag ) + { + sts[0]->bits_frame_nominal = (int16_t) ( ( hCPE->element_brate >> 1 ) / FRAMES_PER_SEC ); + sts[1]->bits_frame_nominal = (int16_t) ( ( hCPE->element_brate >> 1 ) / FRAMES_PER_SEC ); + } + else + { + stereo_dft_config( NULL, hCPE->element_brate, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal ); + } + } + + if ( hCPE->element_mode == IVAS_CPE_MDCT ) + { + /* compute bit-rate surplus per channel in combined format coding */ + int32_t brate_surplus[CPE_CHANNELS]; + if ( st_ivas->ism_mode == ISM_MASA_MODE_MASA_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_PARAM_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_DISC ) + { + brate_surplus[0] = ( ( hCPE->brate_surplus / FRAMES_PER_SEC ) >> 1 ) * FRAMES_PER_SEC; + brate_surplus[1] = hCPE->brate_surplus - brate_surplus[0]; + } + + /* this is just for initialization, the true values of "total_brate" and "bits_frame_channel" are set later */ + for ( n = 0; n < n_CoreChannels; n++ ) + { + if ( st_ivas->hMCT ) + { + int16_t lfe_bits; + lfe_bits = ( ivas_format == MC_FORMAT && st_ivas->mc_mode == MC_MODE_MCT ? st_ivas->hLFE->lfe_bits : 0 ); + sts[n]->total_brate = hCPE->element_brate; + sts[n]->bits_frame_nominal = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC ); + sts[n]->bits_frame_channel = (int16_t) ( ( ivas_total_brate / FRAMES_PER_SEC - lfe_bits - nb_bits_metadata ) / st_ivas->hMCT->nchan_out_woLFE ); + } + else + { + sts[n]->bits_frame_nominal = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC ); + sts[n]->bits_frame_channel = (int16_t) ( ( hCPE->element_brate / FRAMES_PER_SEC ) / n_CoreChannels ); + sts[n]->total_brate = hCPE->element_brate / n_CoreChannels; + + /* subtract bit-rate for combined format coding */ + if ( st_ivas->ism_mode == ISM_MASA_MODE_MASA_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_PARAM_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_DISC ) + { + sts[n]->bits_frame_channel += (int16_t) ( brate_surplus[n] / FRAMES_PER_SEC ); + sts[n]->total_brate += brate_surplus[n]; + } + } + } + + /* reconfiguration in case of bitrate switching */ + if ( hCPE->element_brate != hCPE->last_element_brate && st_ivas->hMCT == NULL ) + { + initMdctStereoEncData( hCPE->hStereoMdct, ivas_format, hCPE->element_mode, hCPE->element_brate, max_bwidth, 0, NULL, 0 ); + hCPE->hStereoMdct->isSBAStereoMode = ( ( ivas_format == SBA_FORMAT || ivas_format == SBA_ISM_FORMAT ) && ( st_ivas->nchan_transport == 2 ) ); + } + } + + /*----------------------------------------------------------------* + * Stereo processing + * Stereo down-mix + *----------------------------------------------------------------*/ + + if ( hCPE->element_mode == IVAS_CPE_DFT ) + { + stereo_dft_hybrid_ITD_flag( hCPE->hStereoDft->hConfig, input_Fs, hCPE->hStereoDft->hItd->hybrid_itd_max ); + + /* Time Domain ITD compensation using extrapolation */ + stereo_td_itd( hCPE->hStereoDft->hItd, hCPE->hStereoDft->input_mem_itd, hCPE->hStereoDft->hConfig->hybrid_itd_flag, hCPE->hStereoDft->dft_ovl, sts, input_frame, hCPE->input_mem ); + + /* DFT on right and left input channels */ + stereo_dft_enc_analyze( sts, CPE_CHANNELS, input_frame, hCPE->hStereoDft, NULL, hCPE->hStereoDft->DFT, hCPE->input_mem ); + + sts[0]->total_brate = ( sts[0]->bits_frame_nominal + 10 ) * FRAMES_PER_SEC; /* add small overhead; st[0]->total_brate used in coder_type_modif() */ + /* Update DFT Stereo memories */ + stereo_dft_enc_update( hCPE->hStereoDft, sts[0]->max_bwidth ); + + /* DFT stereo processing */ + stereo_dft_enc_process( hCPE, vad_flag_dtx, vad_hover_flag, input_frame ); + } + else if ( hCPE->element_mode == IVAS_CPE_TD ) + { + /* Determine the energy ratio between the 2 channels */ + tdm_ratio_idx = stereo_tdm_ener_analysis( + ivas_format, + hCPE, input_frame, &tdm_SM_or_LRTD_Pri, &tdm_ratio_idx_SM ); + + /* Compute the downmix signal based on the ratio index */ + stereo_tdm_downmix( hCPE->hStereoTD, sts[0]->input, sts[1]->input, input_frame, tdm_ratio_idx, ( ( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) ? tdm_SM_or_LRTD_Pri : 0 ), tdm_ratio_idx_SM ); + + /* signal the bitrate for BW selection in the SCh */ + sts[0]->bits_frame_channel = 0; + sts[1]->bits_frame_channel = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC ); + sts[1]->bits_frame_channel += (int16_t) ( hCPE->brate_surplus / FRAMES_PER_SEC ); + if ( st_ivas->hQMetaData != NULL ) + { + sts[1]->bits_frame_channel -= st_ivas->hQMetaData->metadata_max_bits; + } + + Etot_last[0] = sts[0]->hNoiseEst->Etot_last; + Etot_last[1] = sts[1]->hNoiseEst->Etot_last; + } + else if ( hCPE->element_mode == IVAS_CPE_MDCT ) + { + stereo_td_itd_mdct_stereo( hCPE, vad_flag_dtx, vad_hover_flag, input_frame ); + } + + /*----------------------------------------------------------------* + * DFT stereo: iDFT and resampling on both channels + *----------------------------------------------------------------*/ + + if ( hCPE->element_mode == IVAS_CPE_DFT ) + { + int32_t internal_Fs; + + internal_Fs = getTcxonly_ivas( IVAS_CPE_MDCT, sts[0]->bits_frame_nominal * FRAMES_PER_SEC, 0, sts[0]->is_ism_format ) == 0 ? INT_FS_16k : max( INT_FS_16k, sts[0]->sr_core ); + + /* iDFT at input sampling rate */ + stereo_dft_enc_synthesize( hCPE->hStereoDft, sts[0]->input, 0, input_Fs, input_Fs, 0 ); + + /* iDFT & resampling to 12.8kHz internal sampling rate */ + stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[0] + L_INP_MEM, 0, input_Fs, INT_FS_12k8, 0 ); + + /* iDFT & resampling to 16kHz internal sampling rate for M channel */ + if ( input_Fs == internal_Fs ) + { + mvr2r( sts[0]->input - STEREO_DFT_OVL_16k, old_inp_16k[0] + L_INP_MEM - STEREO_DFT_OVL_16k, input_frame + STEREO_DFT_OVL_16k ); + } + else + { + stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_16k[0] + L_INP_MEM, 0, input_Fs, internal_Fs, 0 ); + } + + /* DFT Stereo: iDFT of residual signal at 8kHz sampling rate */ + if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] ) + { + mvr2r( sts[1]->old_inp_12k8, old_inp_12k8[1], L_INP_MEM ); + stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[1] + L_INP_MEM, 1, input_Fs, 8000, 0 ); + + /* update old input signal buffer */ + mvr2r( old_inp_12k8[1] + L_FRAME8k, sts[1]->old_inp_12k8, L_INP_MEM ); + } + + /* no iDFT at input sampling rate for Side channel -> reset the buffer */ + set_zero( sts[1]->input, input_frame ); + } + + + /*----------------------------------------------------------------* + * Front Pre-processing + *----------------------------------------------------------------*/ + + for ( n = 0; n < n_CoreChannels; n++ ) + { + error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], + &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], &vad_hover_flag[n], &attack_flag[n], + realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n], + fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_format, st_ivas->hMCT != NULL, ivas_total_brate ); + if ( error != IVAS_ERR_OK ) + { + return error; + } + } + + /* sanity check -> DTX not supported for more than one SCEs/CPEs */ + if ( st_ivas->nSCE + st_ivas->nCPE > 1 ) + { + if ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) + { + sts[0]->core_brate = -1; + sts[0]->total_brate = hCPE->element_brate; + } + } + + /*----------------------------------------------------------------* + * Stereo DTX updates + *----------------------------------------------------------------*/ + + if ( ivas_format == MASA_FORMAT && nb_bits_metadata > 0 && hCPE->hCoreCoder[0]->Opt_DTX_ON ) + { + if ( hCPE->element_mode == IVAS_CPE_DFT || hCPE->element_mode == IVAS_CPE_TD ) + { + reset_metadata_spatial( ivas_format, hCPE->hMetaData, hCPE->element_brate, &tmp, sts[0]->core_brate, nb_bits_metadata ); + } + } + + /* MDCT stereo DTX: active/inactive frame decision; compute FD CNG coherence */ + if ( hCPE->element_mode == IVAS_CPE_MDCT && hEncoderConfig->Opt_DTX_ON ) + { + stereoFdCngCoherence( sts, hCPE->last_element_mode, fft_buff ); + + /* Reset metadata */ + if ( sts[0]->cng_sba_flag || ( ivas_format == SBA_FORMAT ) ) + { + reset_metadata_spatial( ivas_format, hCPE->hMetaData, hCPE->element_brate, &tmp, sts[0]->core_brate, nb_bits_metadata ); + } + } + + /*----------------------------------------------------------------* + * Core codec configuration + *----------------------------------------------------------------*/ + + /* IGF reconfiguration */ + for ( n = 0; n < n_CoreChannels; n++ ) + { + if ( ( hCPE->last_element_brate != hCPE->element_brate || hCPE->element_mode != hCPE->last_element_mode || ( hCPE->element_mode == IVAS_CPE_TD && sts[0]->bits_frame_nominal != last_bits_frame_nominal ) || sts[n]->last_bwidth != sts[n]->bwidth ) && ( n == 0 || hCPE->element_mode == IVAS_CPE_MDCT ) ) + { + int16_t igf; + igf = getIgfPresent( sts[n]->element_mode, sts[n]->bits_frame_nominal * FRAMES_PER_SEC, sts[n]->max_bwidth, sts[n]->rf_mode ); + if ( ( error = IGF_Reconfig( &sts[n]->hIGFEnc, igf, 0, sts[n]->bits_frame_nominal * FRAMES_PER_SEC, sts[n]->max_bwidth, sts[n]->element_mode, sts[n]->rf_mode ) ) != IVAS_ERR_OK ) + { + return error; + } + } + } + + if ( hCPE->element_mode == IVAS_CPE_MDCT && st_ivas->hMCT == NULL ) + { + /* set coded BW for MDCT stereo */ + set_bw_stereo( hCPE ); + + /* reconfiguration of MDCT stereo */ + if ( sts[0]->bwidth != sts[0]->last_bwidth || ( ( hCPE->last_element_brate != hCPE->element_brate || hCPE->last_element_mode != hCPE->element_mode ) && sts[0]->bwidth != sts[0]->max_bwidth ) ) + { + initMdctStereoEncData( hCPE->hStereoMdct, ivas_format, hCPE->element_mode, hCPE->element_brate, sts[0]->bwidth, 0, NULL, 0 ); + hCPE->hStereoMdct->isSBAStereoMode = ( ( ivas_format == SBA_FORMAT || ivas_format == SBA_ISM_FORMAT ) && ( st_ivas->nchan_transport == 2 ) ); + + if ( hCPE->element_brate <= MAX_MDCT_ITD_BRATE && ivas_format == STEREO_FORMAT ) + { + if ( ( error = initMdctItdHandling( hCPE->hStereoMdct, input_Fs ) ) != IVAS_ERR_OK ) + { + return error; + } + } + } + } + + /* set ACELP@12k8 / ACELP@16k flag for flexible ACELP core */ + for ( n = 0; n < n_CoreChannels; n++ ) + { + if ( ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) && hCPE->element_mode == IVAS_CPE_DFT ) + { + sts[n]->flag_ACELP16k = set_ACELP_flag_IVAS( hCPE->element_mode, hCPE->element_brate, sts[n]->core_brate, n, sts[0]->tdm_LRTD_flag, sts[n]->bwidth, sts[n]->cng_type ); + } + else + { + sts[n]->flag_ACELP16k = set_ACELP_flag_IVAS( hCPE->element_mode, hCPE->element_brate, sts[n]->total_brate, n, sts[0]->tdm_LRTD_flag, sts[n]->bwidth, sts[n]->cng_type ); + } + } + + /* configure TD stereo encoder */ + if ( hCPE->element_mode == IVAS_CPE_TD ) + { + tdm_ol_pitch_comparison( hCPE, pitch_fr, voicing_fr ); + + tdm_configure_enc( ivas_format, st_ivas->ism_mode, hCPE, Etot_last, tdm_SM_or_LRTD_Pri, tdm_ratio_idx, tdm_ratio_idx_SM, attack_flag[0], nb_bits_metadata ); + + if ( hEncoderConfig->Opt_DTX_ON ) + { + stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, -1, NULL, sts[0]->hTdCngEnc->burst_ho_cnt, NULL ); + } + } + + /* modify the coder_type depending on the total_brate per channel */ + for ( n = 0; n < n_CoreChannels; n++ ) + { + if ( ( hCPE->element_mode != IVAS_CPE_DFT && hCPE->element_mode != IVAS_CPE_TD ) || n == 0 ) /* modify coder_type of primary channel */ + { + /* limit coder_type depending on the bitrate */ + coder_type_modif( sts[n], relE[n] ); + } + } + + /*----------------------------------------------------------------* + * Write IVAS format signaling in SID frames + *----------------------------------------------------------------*/ + + if ( sts[0]->core_brate == SID_2k40 ) + { + ivas_write_format_sid( ivas_format, hCPE->element_mode, sts[0]->hBstr ); + } + + /*----------------------------------------------------------------* + * DFT Stereo residual coding + * DFT Stereo parameters writing into the bitstream + *----------------------------------------------------------------*/ + + cpe_brate = 0; + if ( hCPE->element_mode == IVAS_CPE_DFT ) + { + if ( hEncoderConfig->Opt_DTX_ON ) + { + if ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) + { + /* Reconfigure DFT Stereo for inactive frames */ + if ( sts[0]->core_brate == SID_2k40 ) + { + stereo_dft_config( hCPE->hStereoDft->hConfig, IVAS_SID_5k2, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal ); + } + else + { + stereo_dft_config( hCPE->hStereoDft->hConfig, FRAME_NO_DATA, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal ); + } + + stereo_dft_cng_side_gain( hCPE->hStereoDft, hCPE->hStereoCng, sts[0]->core_brate, sts[0]->last_core_brate, sts[0]->bwidth ); + } + else + { + stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, hCPE->hStereoDft->nbands, hCPE->hStereoDft->sidSideGain, sts[0]->hTdCngEnc->burst_ho_cnt, &hCPE->hStereoDft->coh_fade_counter ); + } + } + + /* Write stereo bitstream */ + cpe_brate = st_ivas->hCPE[0]->element_brate; + + /* DFT stereo side bits */ + if ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && cpe_brate < MASA_STEREO_MIN_BITRATE && sts[0]->core_brate != SID_2k40 && sts[0]->core_brate != FRAME_NO_DATA ) + { + nb_bits = 0; /* Only mono downmix is transmitted in this case */ + } + else if ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) ) + { + nb_bits = hCPE->hMetaData->nb_bits_tot; + } + else + { + stereo_dft_enc_write_BS( hCPE, &nb_bits ); + } + + /* Residual coding in MDCT domain */ + if ( !( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) ) ) + { + int16_t max_bits = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC - 0.8f * sts[0]->bits_frame_nominal ); + if ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) + { + max_bits -= nb_bits_metadata; + if ( hCPE->brate_surplus < 0 ) + { + max_bits += (int16_t) ( hCPE->brate_surplus / FRAMES_PER_SEC ); + } + } + + stereo_dft_enc_res( hCPE->hStereoDft, old_inp_12k8[1] + L_INP_MEM - STEREO_DFT_OVL_8k, hCPE->hMetaData, &nb_bits, max_bits ); + } + + if ( sts[0]->core_brate == FRAME_NO_DATA || sts[0]->core_brate == SID_2k40 ) + { + assert( ( nb_bits <= ( ( IVAS_SID_5k2 - SID_2k40 ) / FRAMES_PER_SEC - SID_FORMAT_NBITS ) ) && "Stereo DFT CNG: bit budget is violated" ); + } + else + { + /* Flexible total bitrate in M channel */ + sts[0]->total_brate = hCPE->element_brate - ( nb_bits * FRAMES_PER_SEC ); + } + + /* subtract metadata bitbudget */ + sts[0]->total_brate -= ( nb_bits_metadata * FRAMES_PER_SEC ); + + /* subtract bit-rate for combined format coding */ + if ( ivas_format == MASA_ISM_FORMAT && ( st_ivas->ism_mode == ISM_MASA_MODE_MASA_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_PARAM_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_DISC ) ) + { + sts[0]->total_brate += hCPE->brate_surplus; + } + } + + + /*----------------------------------------------------------------* + * Core Encoder + *----------------------------------------------------------------*/ + + if ( ( error = ivas_core_enc( NULL, hCPE, st_ivas->hMCT, n_CoreChannels, old_inp_12k8, old_inp_16k, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, tdm_SM_or_LRTD_Pri, ivas_format, 0 ) ) != IVAS_ERR_OK ) + { + return error; + } + + /*----------------------------------------------------------------* + * Common updates + *----------------------------------------------------------------*/ + + hCPE->last_element_brate = hCPE->element_brate; + hCPE->last_element_mode = hCPE->element_mode; + + if ( ivas_format == MASA_ISM_FORMAT ) + { + hCPE->element_brate = element_brate_ref; + } + + if ( hCPE->element_mode == IVAS_CPE_MDCT && hCPE->hStereoMdct != NULL && hCPE->hStereoMdct->hItd != NULL ) + { + /* update input samples buffer */ + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + mvr2r( orig_input[n], sts[n]->old_input_signal, input_frame ); + } + } + else if ( hCPE->element_mode == IVAS_CPE_DFT ) + { + mvr2r( sts[0]->input, sts[0]->old_input_signal, input_frame ); + } + else if ( st_ivas->hMCT == NULL ) /* note: in MCT, input buffers are updated later in ivas_mct_enc() */ + { + /* update input samples buffer */ + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + mvr2r( sts[n]->input, sts[n]->old_input_signal, input_frame ); + } + } + + if ( hCPE->hFrontVad[0] != NULL ) + { + hCPE->hFrontVad[0]->ini_frame++; + hCPE->hFrontVad[0]->ini_frame = min( hCPE->hFrontVad[0]->ini_frame, MAX_FRAME_COUNTER ); + } + + /* Store previous attack detection flag */ + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + sts[n]->hTranDet->transientDetector.prev_bIsAttackPresent = sts[n]->hTranDet->transientDetector.bIsAttackPresent; + } + + + pop_wmops(); + return error; +} +#endif /*------------------------------------------------------------------------- * create_cpe_enc() diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index fa8970e00..36fa5a0c7 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -152,7 +152,8 @@ typedef struct stereo_dft_enc_data_struct /*FFT*/ #ifdef IVAS_FLOAT_FIXED Word32 DFT_fx[CPE_CHANNELS][STEREO_DFT_N_MAX_ENC]; - Word16 DFT_q_fx[CLDFB_NO_CHANNELS_MAX]; + Word16 DFT_fx_e[CPE_CHANNELS]; + // Word16 DFT_q_fx[CLDFB_NO_CHANNELS_MAX]; #endif float DFT[CPE_CHANNELS][STEREO_DFT_N_MAX_ENC]; int16_t dft_ovl; /* Overlap size */ @@ -307,6 +308,12 @@ typedef struct stereo_dft_enc_data_struct /*misc*/ float icbweRefEner; float lbEner; +#ifdef IVAS_FLOAT_FIXED + Word32 icbweRefEner_fx; + Word16 icbweRefEner_fx_e; + Word32 lbEner_fx; + Word16 lbEner_fx_e; +#endif int16_t flip_sign; Word32 dmx_res_all_prev_fx; /* energy of the previous frame Q31*/ Word16 switch_fade_factor_fx; /* Adaptive fade factor for switch frame Q15*/ @@ -352,7 +359,7 @@ typedef struct stereo_dft_enc_data_struct const Word16 *dft_trigo_16k_fx; const Word16 *dft_trigo_32k_fx; - Word32 output_mem_res_8k_fx[STEREO_DFT_OVL_8k]; + Word32 output_mem_res_8k_fx[STEREO_DFT_OVL_8k]; // Q16 Word32 res_cod_NRG_M_fx[STEREO_DFT_BAND_MAX]; Word32 res_cod_NRG_S_fx[STEREO_DFT_BAND_MAX]; @@ -362,11 +369,11 @@ typedef struct stereo_dft_enc_data_struct Word32 past_nrgR_fx[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX]; Word32 past_dot_prod_real_fx[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX]; Word32 past_dot_prod_imag_fx[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX]; - Word32 output_mem_dmx_fx[STEREO_DFT_OVL_MAX]; - Word32 output_mem_dmx_12k8_fx[STEREO_DFT_OVL_12k8]; - Word32 output_mem_dmx_16k_fx[STEREO_DFT_OVL_16k]; /*can hold 16, 12.8 or 32kHz signals*/ - Word32 output_mem_dmx_32k_fx[STEREO_DFT_OVL_32k]; /*can hold 16, 12.8 or 32kHz signals*/ - Word32 output_mem_dmx_16k_shb_fx[STEREO_DFT_OVL_16k]; + Word32 output_mem_dmx_fx[STEREO_DFT_OVL_MAX]; // Q16 + Word32 output_mem_dmx_12k8_fx[STEREO_DFT_OVL_12k8]; // Q16 + Word32 output_mem_dmx_16k_fx[STEREO_DFT_OVL_16k]; /*can hold 16, 12.8 or 32kHz signals*/ // Q16 + Word32 output_mem_dmx_32k_fx[STEREO_DFT_OVL_32k]; /*can hold 16, 12.8 or 32kHz signals*/ // Q16 + Word32 output_mem_dmx_16k_shb_fx[STEREO_DFT_OVL_16k]; // Q16 Word32 input_mem_itd_fx[CPE_CHANNELS][STEREO_DFT_OVL_MAX]; Word32 gipd_fx[STEREO_DFT_ENC_DFT_NB]; diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index 46db4d6a5..8fb7d28c8 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -421,6 +421,55 @@ static void stereo_dft_enc_open( set_zero( hStereoDft->output_mem_dmx_16k_shb, STEREO_DFT_OVL_16k ); set_zero( hStereoDft->output_mem_res_8k, STEREO_DFT_OVL_8k ); +#ifdef IVAS_FLOAT_FIXED + hStereoDft->dft_trigo_8k_fx = dft_trigo_32k_fx; + hStereoDft->dft_trigo_12k8_fx = dft_trigo_12k8_fx; + hStereoDft->dft_trigo_16k_fx = dft_trigo_32k_fx; + hStereoDft->dft_trigo_32k_fx = dft_trigo_32k_fx; + + hStereoDft->win_ana_8k_fx = win_ana_8k_fx; + hStereoDft->win_ana_12k8_fx = win_ana_12k8_fx; + hStereoDft->win_ana_16k_fx = win_ana_16k_fx; + hStereoDft->win_ana_32k_fx = win_ana_32k_fx; + + hStereoDft->win_8k_fx = win_syn_8k_fx; + hStereoDft->win_12k8_fx = win_syn_12k8_fx; + hStereoDft->win_16k_fx = win_syn_16k_fx; + hStereoDft->win_32k_fx = win_syn_32k_fx; + + IF( EQ_32( input_Fs, 16000 ) ) + { + hStereoDft->dft_trigo_fx = dft_trigo_32k_fx; + hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP; + hStereoDft->win_ana_fx = win_ana_16k_fx; + hStereoDft->win_fx = win_syn_16k_fx; + } + ELSE IF( EQ_32( input_Fs, 32000 ) ) + { + hStereoDft->dft_trigo_fx = dft_trigo_32k_fx; + hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_32k_STEP; + hStereoDft->win_ana_fx = win_ana_32k_fx; + hStereoDft->win_fx = win_syn_32k_fx; + } + ELSE + { + assert( EQ_32( input_Fs, 48000 ) ); + hStereoDft->dft_trigo_fx = dft_trigo_48k_fx; + hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_48k_STEP; + hStereoDft->win_ana_fx = win_ana_48k_fx; + hStereoDft->win_fx = win_syn_48k_fx; + } + + hStereoDft->win_mdct_8k_fx = win_mdct_8k_fx; + + /*I/O Buffers*/ + set_zero_fx( hStereoDft->output_mem_dmx_fx, STEREO_DFT_OVL_MAX ); + set_zero_fx( hStereoDft->output_mem_dmx_12k8_fx, STEREO_DFT_OVL_12k8 ); + set_zero_fx( hStereoDft->output_mem_dmx_16k_fx, STEREO_DFT_OVL_16k ); + set_zero_fx( hStereoDft->output_mem_dmx_16k_shb_fx, STEREO_DFT_OVL_16k ); + set_zero_fx( hStereoDft->output_mem_res_8k_fx, STEREO_DFT_OVL_8k ); +#endif + /*Bands: find the number of bands, Nyquist freq. is not taken into account*/ NFFT_inner = STEREO_DFT_N_MAX_ENC * inner_frame_tbl[max_bwidth] / L_FRAME48k; #ifndef IVAS_FLOAT_FIXED @@ -461,17 +510,15 @@ static void stereo_dft_enc_open_fx( Word16 win[STEREO_DFT_OVL_MAX]; /*Sizes*/ - /* input_Fs / 48000 */ - Word16 input_Fs_48k = extract_l( Mpy_32_32( input_Fs, 44739 /* 1 / 48000 in Q31 */ ) ); - // input_Fs_48k = shr(input_Fs_48k, sub(15, div_e)); - - hStereoDft->N = i_mult( STEREO_DFT_HOP_MAX_ENC, input_Fs_48k ); // e = div_e + hStereoDft->N = extract_l( Mpy_32_32( input_Fs, 42949673 /* STEREO_DFT_HOP_MAX_ENC / 48000 in Q31 */ ) ); // e = div_e assert( ( ( input_Fs / FRAMES_PER_SEC ) / hStereoDft->N ) == 1 ); /*Init. DFT sizes*/ - hStereoDft->NFFT = i_mult( STEREO_DFT_N_MAX_ENC, input_Fs_48k ); // e = div_e - hStereoDft->dft_ovl = i_mult( STEREO_DFT_OVL_MAX, input_Fs_48k ); // e = div_e + // hStereoDft->NFFT = i_mult( STEREO_DFT_N_MAX_ENC, input_Fs_48k ); // e = div_e + hStereoDft->NFFT = extract_l( Mpy_32_32( input_Fs, 85899346 ) ); // e = div_e + // hStereoDft->dft_ovl = i_mult( STEREO_DFT_OVL_MAX, input_Fs ); // e = div_e + hStereoDft->dft_ovl = extract_l( Mpy_32_32( input_Fs, 18790482 ) ); // e = div_e mdct_window_sine_IVAS_updated( win_p, input_Fs, hStereoDft->dft_ovl, FULL_OVERLAP, IVAS_CPE_DFT ); // win_e = 15 FOR( Word16 i = 0; i < shr( STEREO_DFT_OVL_MAX, 1 ); i++ ) { @@ -1115,6 +1162,423 @@ void stereo_dft_enc_analyze( * Inverse DFT on a 20ms frame *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +Word32 stereo_dft_enc_synthesize_fx( + STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle */ + // float *output, /* o : output synthesis */ + Word32 *output_fx, /* o : output synthesis Q16 */ + Word16 *output_start_index, + Word16 *output_end_index, + const Word16 chan, /* i : channel number */ + const Word32 input_Fs, /* i : input sampling rate */ + const Word32 output_sampling_rate, /* i : output sampling rate */ + const Word16 L_frame, /* i : frame length at internal Fs */ + Word16 *nrg_out_fx_e ) +{ + Word16 i, j, sign; + // float *pDFT_in; + Word32 *pDFT_in_fx; + Word16 DFT_in_fx_e; + Word16 offset, NFFT, N, ovl, zp; + Word16 temp_exp; + // float fac; + Word32 fac_fx; + // float *mem; + Word32 *mem_fx; + // const float *trigo, *win, *win_ana; + const Word16 *trigo_fx; + const Word32 *win_fx, *win_ana_fx; + // float tmp[STEREO_DFT_N_MAX_ENC]; + Word32 tmp_fx[STEREO_DFT_N_MAX_ENC]; + // float nrg; + Word32 nrg_fx; + Word16 nrg_fx_e; + // float trigo_enc[STEREO_DFT_N_MAX_ENC / 2 + 1]; + Word16 trigo_enc_fx[STEREO_DFT_N_MAX_ENC / 2 + 1]; + Word16 trigo_step; + Word16 scal_fac; + + + /*-----------------------------------------------------------------* + * Initialization + *-----------------------------------------------------------------*/ + + IF( L_frame > 0 ) + { + assert( ( output_sampling_rate == 16000 ) && "High-band generation only possible at 16kHz!" ); + } + nrg_fx = 0; + move32(); + nrg_fx_e = 0; + move16(); + + hStereoDft->icbweRefEner_fx = 0; + move32(); + hStereoDft->icbweRefEner_fx_e = 0; + move16(); + hStereoDft->lbEner_fx = 0; + move32(); + hStereoDft->lbEner_fx_e = 0; + move16(); + + IF( chan == 0 ) + { + pDFT_in_fx = hStereoDft->DFT_fx[0]; + DFT_in_fx_e = hStereoDft->DFT_fx_e[0]; + move16(); + } + ELSE + { + pDFT_in_fx = hStereoDft->DFT_fx[1]; + DFT_in_fx_e = hStereoDft->DFT_fx_e[1]; + move16(); + } + + IF( EQ_32( output_sampling_rate, input_Fs ) ) + { + assert( chan == 0 ); + + NFFT = hStereoDft->NFFT; + move16(); + fac_fx = MAX_32; + move32(); + N = hStereoDft->N; + move16(); + ovl = hStereoDft->dft_ovl; + move16(); + zp = hStereoDft->dft_zp; + move16(); + trigo_fx = hStereoDft->dft_trigo_fx; + trigo_step = hStereoDft->dft_trigo_step; + move16(); + IF( L_frame > 0 ) + { + mem_fx = hStereoDft->output_mem_dmx_16k_shb_fx; + } + ELSE + { + mem_fx = hStereoDft->output_mem_dmx_fx; + } + win_fx = hStereoDft->win_fx; + win_ana_fx = hStereoDft->win_ana_fx; + + push_wmops( "DFT_synth_fs" ); + } + ELSE IF( EQ_32( output_sampling_rate, INT_FS_12k8 ) ) + { + assert( chan == 0 ); + + NFFT = STEREO_DFT_N_12k8_ENC; + move16(); + N = STEREO_DFT_HOP_12k8_ENC; + move16(); + zp = STEREO_DFT_ZP_12k8_ENC; + move16(); + // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT ); + fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp ); + fac_fx = L_shl( fac_fx, temp_exp ); + ovl = STEREO_DFT_OVL_12k8; + move16(); + trigo_fx = hStereoDft->dft_trigo_12k8_fx; + trigo_step = STEREO_DFT_TRIGO_SRATE_12k8_STEP; + move16(); + mem_fx = hStereoDft->output_mem_dmx_12k8_fx; + win_fx = hStereoDft->win_12k8_fx; + win_ana_fx = hStereoDft->win_ana_12k8_fx; + + push_wmops( "DFT_synth_12k8" ); + } + ELSE IF( EQ_32( output_sampling_rate, 16000 ) ) + { + assert( chan == 0 ); + + NFFT = STEREO_DFT_N_16k_ENC; + move16(); + N = STEREO_DFT_HOP_16k_ENC; + move16(); + zp = STEREO_DFT_ZP_16k_ENC; + move16(); + // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT ); + fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp ); + fac_fx = L_shl( fac_fx, temp_exp ); + ovl = STEREO_DFT_OVL_16k; + move16(); + trigo_fx = hStereoDft->dft_trigo_16k_fx; + trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP; + move16(); + IF( L_frame > 0 ) + { + mem_fx = hStereoDft->output_mem_dmx_16k_shb_fx; + + push_wmops( "DFT_synth_16k_shb" ); + } + ELSE + { + mem_fx = hStereoDft->output_mem_dmx_16k_fx; + + push_wmops( "DFT_synth_16k" ); + } + win_fx = hStereoDft->win_16k_fx; + win_ana_fx = hStereoDft->win_ana_16k_fx; + } + ELSE IF( EQ_32( output_sampling_rate, 32000 ) ) + { + assert( chan == 0 ); + + NFFT = STEREO_DFT_N_32k_ENC; + move16(); + N = STEREO_DFT_HOP_32k_ENC; + move16(); + zp = STEREO_DFT_ZP_32k_ENC; + move16(); + // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT ); + fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp ); + fac_fx = L_shl( fac_fx, temp_exp ); + ovl = STEREO_DFT_OVL_32k; + move16(); + trigo_fx = hStereoDft->dft_trigo_32k_fx; + trigo_step = STEREO_DFT_TRIGO_SRATE_32k_STEP; + move16(); + mem_fx = hStereoDft->output_mem_dmx_32k_fx; + win_fx = hStereoDft->win_32k_fx; + win_ana_fx = hStereoDft->win_ana_32k_fx; + + push_wmops( "DFT_synth_32k" ); + } + ELSE IF( EQ_32( output_sampling_rate, 8000 ) ) + { + assert( chan == 1 ); + + NFFT = STEREO_DFT_N_8k_ENC; + move16(); + N = STEREO_DFT_HOP_8k_ENC; + move16(); + zp = STEREO_DFT_ZP_8k_ENC; + move16(); + // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT ); + fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp ); + fac_fx = L_shl( fac_fx, temp_exp ); + ovl = STEREO_DFT_OVL_8k; + move16(); + trigo_fx = hStereoDft->dft_trigo_8k_fx; + trigo_step = STEREO_DFT_TRIGO_SRATE_8k_STEP; + move16(); + mem_fx = hStereoDft->output_mem_res_8k_fx; + win_fx = hStereoDft->win_8k_fx; + win_ana_fx = hStereoDft->win_ana_8k_fx; + + push_wmops( "DFT_synth_8k" ); + } + ELSE + { + assert( 0 && "DFT stereo: sampling rate not supported!" ); + NFFT = -1; /* to avoid compilation warning */ + move16(); + fac_fx = -1; /* to avoid compilation warning */ + move32(); + N = -1; /* to avoid compilation warning */ + move16(); + zp = -1; /* to avoid compilation warning */ + move16(); + win_fx = NULL; /* to avoid compilation warning */ + trigo_fx = NULL; /* to avoid compilation warning */ + trigo_step = -1; /* to avoid compilation warning */ + move16(); + ovl = -1; /* to avoid compilation warning */ + move16(); + mem_fx = NULL; /* to avoid compilation warning */ + win_ana_fx = NULL; /* to avoid compilation warning */ + } + + offset = 0; + move16(); + + FOR( i = 0; i < shr( NFFT, 2 ); i++ ) + { + trigo_enc_fx[i] = trigo_fx[imult1616( i, trigo_step )]; + move16(); + trigo_enc_fx[sub( shr( NFFT, 1 ), i )] = trigo_fx[imult1616( i, trigo_step )]; + move16(); + } + trigo_enc_fx[shr( NFFT, 2 )] = trigo_fx[imult1616( shr( NFFT, 2 ), trigo_step )]; + move16(); + + /*-----------------------------------------------------------------* + * Synthesizing & resampling + *-----------------------------------------------------------------*/ + + offset = negate( ovl ); + + test(); + IF( EQ_16( L_frame, L_FRAME ) || EQ_16( L_frame, L_FRAME16k ) ) + { + // for ( i = (int16_t) ( 200 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i < (int16_t) ( 400 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i++ ) + FOR( i = 320; i < 640; i++ ) + { + // hStereoDft->icbweRefEner += pDFT_in[2 * i] * pDFT_in[2 * i] + pDFT_in[2 * i + 1] * pDFT_in[2 * i + 1]; + hStereoDft->icbweRefEner_fx = BASOP_Util_Add_Mant32Exp( hStereoDft->icbweRefEner_fx, hStereoDft->icbweRefEner_fx_e, L_add( L_shr( Mpy_32_32( pDFT_in_fx[2 * i], pDFT_in_fx[2 * i] ), 1 ), L_shr( Mpy_32_32( pDFT_in_fx[2 * i + 1], pDFT_in_fx[2 * i + 1] ), 1 ) ), add( shl( DFT_in_fx_e, 1 ), 1 ), &hStereoDft->icbweRefEner_fx_e ); + move32(); + } + // for ( i = 0; i < (int16_t) ( 200 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i++ ) + FOR( i = 0; i < 320; i++ ) + { + // hStereoDft->lbEner += pDFT_in[2 * i] * pDFT_in[2 * i] + pDFT_in[2 * i + 1] * pDFT_in[2 * i + 1]; + hStereoDft->lbEner_fx = BASOP_Util_Add_Mant32Exp( hStereoDft->lbEner_fx, hStereoDft->lbEner_fx_e, L_add( L_shr( Mpy_32_32( pDFT_in_fx[2 * i], pDFT_in_fx[2 * i] ), 1 ), L_shr( Mpy_32_32( pDFT_in_fx[2 * i + 1], pDFT_in_fx[2 * i + 1] ), 1 ) ), add( shl( DFT_in_fx_e, 1 ), 1 ), &hStereoDft->lbEner_fx_e ); + move32(); + } + hStereoDft->icbweRefEner_fx = Mpy_32_32( hStereoDft->icbweRefEner_fx, fac_fx ); + move32(); + hStereoDft->lbEner_fx = Mpy_32_32( hStereoDft->lbEner_fx, fac_fx ); + move32(); + } + + /*Flip?*/ + set32_fx( tmp_fx, 0, STEREO_DFT_N_MAX_ENC ); + IF( EQ_16( L_frame, L_FRAME ) ) + { + /* 6 to 14 kHz SHB target signal*/ + j = 2; + move16(); + sign = hStereoDft->flip_sign; + move16(); + + // for ( i = (int16_t) ( 350 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i >= (int16_t) 150 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i-- ) + FOR( i = 560; i >= 240; i-- ) + { + /* alternate sign between frames for even starting index */ + tmp_fx[j++] = W_extract_l( W_mult0_32_32( sign, Mpy_32_32( pDFT_in_fx[2 * i], fac_fx ) ) ); + tmp_fx[j++] = W_extract_l( W_mult0_32_32( negate( sign ), Mpy_32_32( pDFT_in_fx[2 * i + 1], fac_fx ) ) ); + move32(); + move32(); + } + scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 ); + scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16 + DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac ); + + hStereoDft->flip_sign = negate( sign ); + move16(); + + // for ( i = 0; i <= (int16_t) 100 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i++ ) + FOR( i = 0; i <= 160; i++ ) + { + nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i], tmp_fx[2 * i] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e ); + nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i + 1], tmp_fx[2 * i + 1] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e ); + } + } + ELSE IF( EQ_16( L_frame, L_FRAME16k ) ) + { + /* 7.5 - 15.5 kHz SHB target signal*/ + j = 2; + move16(); + // for ( i = (int16_t) ( 400 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ) - 1; i >= (int16_t) 200 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i-- ) + FOR( i = 640 - 1; i >= 320; i-- ) + { + tmp_fx[j++] = Mpy_32_32( pDFT_in_fx[2 * i], fac_fx ); + tmp_fx[j++] = L_negate( Mpy_32_32( pDFT_in_fx[2 * i + 1], fac_fx ) ); + move32(); + move32(); + } + scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 ); + scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16 + DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac ); + + // for ( i = 0; i <= (int16_t) 100 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i++ ) + FOR( i = 0; i <= 160; i++ ) + { + nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i], tmp_fx[2 * i] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e ); + nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i + 1], tmp_fx[2 * i + 1] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e ); + } + } + ELSE IF( NE_32( fac_fx, MAX_32 ) ) + { + /*Copy and scale*/ + tmp_fx[0] = Mpy_32_32( pDFT_in_fx[0], fac_fx ); + move32(); + tmp_fx[1] = 0; + move32(); + FOR( i = 2; i < NFFT; i++ ) + { + tmp_fx[i] = Mpy_32_32( pDFT_in_fx[i], fac_fx ); + move32(); + } + + IF( LT_32( fac_fx, MAX_32 ) ) + { + tmp_fx[1] = 0; /*Nyquist is set to 0*/ + tmp_fx[0] = 0; /*DC is set to 0*/ + move32(); + move32(); + } + scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 ); + scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16 + DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac ); + } + ELSE + { + FOR( i = 0; i < NFFT; i++ ) + { + tmp_fx[i] = pDFT_in_fx[i]; + move32(); + } + scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 ); + scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16 + DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac ); + } + + /*Reconstruct */ + /*IFFT*/ + scal_fac = sub( 1, find_guarded_bits_fx( NFFT ) ); + scale_sig32( tmp_fx, NFFT, scal_fac ); // Q16 + rfft_fx( tmp_fx, trigo_enc_fx, NFFT, +1 ); + Scale_sig32( tmp_fx, NFFT, sub( sub( DFT_in_fx_e, scal_fac ), 15 ) ); // Q16 + + FOR( i = 0; i < ovl; i++ ) + { + // output[offset + i] = mem[i] + tmp[zp + i] * win[i]; + output_fx[offset + i] = L_add_sat( mem_fx[i], Mpy_32_32( tmp_fx[zp + i], win_fx[i] ) ); + move32(); + // mem[i] = tmp[zp + N + i] * win[ovl - 1 - i]; + mem_fx[i] = Mpy_32_32( tmp_fx[zp + N + i], win_fx[ovl - 1 - i] ); + move32(); + } + + /*Middle->Copy*/ + FOR( i = 0; i < N - ovl; i++ ) + { + // output_fx[offset + ovl + i] = tmp[zp + ovl + i]; + output_fx[offset + ovl + i] = tmp_fx[zp + ovl + i]; + move32(); + } + + /*-----------------------------------------------------------------* + * Lookahead: redress signal + *-----------------------------------------------------------------*/ + + FOR( i = 0; i < ovl; i++ ) + { + Word32 ifft_deviation = tmp_fx[zp + N + ovl]; + move32(); + // output[offset + N + i] = ( tmp[zp + N + i] - ifft_deviation ) / win_ana[ovl - 1 - i] + ifft_deviation; + Word16 L_temp_e; + Word32 L_temp = BASOP_Util_Divide3232_Scale_cadence( L_sub_sat( tmp_fx[zp + N + i], ifft_deviation ), win_ana_fx[ovl - 1 - i], &L_temp_e ); + L_temp = L_shl_sat( L_temp, L_temp_e ); + output_fx[offset + N + i] = L_add_sat( L_temp, ifft_deviation ); // Q16 + move32(); + } + + *output_start_index = offset; + move16(); + *output_end_index = add( add( offset, ovl ), N ); + move16(); + IF( nrg_out_fx_e ) + { + *nrg_out_fx_e = nrg_fx_e; + move16(); + } + pop_wmops(); + return ( nrg_fx ); +} +#endif float stereo_dft_enc_synthesize( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle */ float *output, /* o : output synthesis */ diff --git a/lib_enc/swb_pre_proc.c b/lib_enc/swb_pre_proc.c index 91dc8beea..3336ee336 100644 --- a/lib_enc/swb_pre_proc.c +++ b/lib_enc/swb_pre_proc.c @@ -44,6 +44,10 @@ #include "wmc_auto.h" #include "ivas_prot.h" #include "ivas_rom_enc.h" +#ifdef IVAS_FLOAT_FIXED +#include "ivas_prot_fx.h" +#include "prot_fx.h" +#endif /*-------------------------------------------------------------------* * Local constants @@ -270,6 +274,550 @@ void wb_pre_proc( * - Common SWB TBE and SWB BWE pre-processing *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +/*full implementation pending*/ +void swb_pre_proc_ivas_fx( + Encoder_State *st, /* i/o: encoder state structure */ + float *new_swb_speech, /* o : original input signal at 32kHz */ + Word32 *new_swb_speech_fx, /* o : original input signal at 32kHz */ + float *shb_speech, /* o : SHB target signal (6-14kHz) at 16kHz */ + float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : real buffer */ + float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : imag buffer */ + CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */ +) +{ + int16_t Sample_Delay_SWB_BWE, inner_frame, delay; + TD_BWE_ENC_HANDLE hBWE_TD; + FD_BWE_ENC_HANDLE hBWE_FD; + int32_t inner_Fs, input_Fs; + float old_input[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k]; + Word32 old_input_fx[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k]; // fixed counterpart + float spchTmp[L_FRAME32k], spchTmp2[L_FRAME32k]; + int16_t i, j, L_resamp; + int16_t startB, endB; + float *realBufferFlipped[CLDFB_NO_COL_MAX]; + float *imagBufferFlipped[CLDFB_NO_COL_MAX]; + float realBufferTmp[CLDFB_NO_COL_MAX][20]; + float imagBufferTmp[CLDFB_NO_COL_MAX][20]; + int16_t ts, nB, uB; + float sign, lbEner, v, t, regression; + const float *thr, *regV; + int16_t Sample_Delay_SWB_BWE32k, lMemRecalc32k, dft_ovl32k; + + lMemRecalc32k = NS2SA( 32000, L_MEM_RECALC_NS ); + + /* initialization */ + hBWE_TD = st->hBWE_TD; + hBWE_FD = st->hBWE_FD; + input_Fs = st->input_Fs; + + for ( j = 0; j < CLDFB_NO_COL_MAX; j++ ) + { + set_f( realBufferTmp[j], 0, 20 ); + set_f( imagBufferTmp[j], 0, 20 ); + realBufferFlipped[j] = realBufferTmp[j]; + imagBufferFlipped[j] = imagBufferTmp[j]; + } + + set_f( old_input, 0.0f, NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k ); + + if ( input_Fs == 32000 ) + { + if ( st->element_mode > EVS_MONO ) + { + Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS ); + if ( st->L_frame == L_FRAME16k ) + { + Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS ); + } + + mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE ); + mvr2r( st->input - L_FRAME32k, hBWE_FD->old_fdbwe_speech, L_FRAME32k ); + + if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB ) + { + mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ); + } + else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB ) + { + mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ); + } + } + + mvr2r( st->input, new_swb_speech, L_FRAME32k ); + + if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE && st->extl != SWB_BWE_HIGHRATE ) + { + Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ); + if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k ) + { + Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS ); + } + if ( st->element_mode > EVS_MONO ) + { + Sample_Delay_SWB_BWE -= NS2SA( 32000, DELAY_FIR_RESAMPL_NS ); + } + + mvr2r( hBWE_FD->old_fdbwe_speech, &old_input[Sample_Delay_SWB_BWE], L_FRAME32k ); + + set_f( old_input, 0, Sample_Delay_SWB_BWE ); + mvr2r( hBWE_FD->old_fdbwe_speech + L_FRAME32k - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE ); + if ( st->extl != WB_BWE ) + { + mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME32k ); + } + } + + if ( st->extl != SWB_BWE && st->extl != FB_BWE ) + { + mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME32k ); + } + } + else /* 48 kHz */ + { + + Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS ); + Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS ); + if ( st->L_frame == L_FRAME16k ) + { + Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS ); + Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_16k_NS ); + } + + dft_ovl32k = 0; + if ( st->element_mode == IVAS_CPE_DFT ) + { + dft_ovl32k = (int16_t) ( STEREO_DFT_OVL_MAX * 32000 / 48000 ); + } + + if ( st->codec_mode == MODE1 ) + { + if ( st->element_mode > EVS_MONO ) + { + + if ( st->element_mode == IVAS_CPE_TD ) + { + } + else if ( st->bwidth == FB ) + { + mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE ); + } + + mvr2r( st->input - L_FRAME48k, hBWE_FD->old_fdbwe_speech, L_FRAME48k ); + + if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB ) + { + if ( st->bwidth == SWB ) + { + /* buffers hBWE_FD->old_input[] and hBWE_FD->old_wtda_swb[] need to be at 32 kHz (inner) sampling rate */ + + decimate_2_over_3_allpass( st->input - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, spchTmp, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp ); + + mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( lMemRecalc32k - Sample_Delay_SWB_BWE32k ), lMemRecalc32k - Sample_Delay_SWB_BWE32k ); + mvr2r( spchTmp + lMemRecalc32k - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k ); + } + else /* FB_BWE */ + { + mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ); + mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE ); + } + } + else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB ) + { + if ( st->bwidth == SWB ) + { + lerp_flt( st->input - hCPE->hStereoDft->dft_ovl, spchTmp, dft_ovl32k - Sample_Delay_SWB_BWE32k, hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ); + + mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( dft_ovl32k - Sample_Delay_SWB_BWE32k ), dft_ovl32k - Sample_Delay_SWB_BWE32k ); + } + else + { + mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ); + } + } + } + + if ( ( st->extl != SWB_BWE && st->extl != FB_BWE && st->core == ACELP_CORE ) || ( st->element_mode == IVAS_CPE_DFT && st->core != ACELP_CORE ) /*resampling not needed for MDCT cores*/ ) + { + /* move the resampling out of the TDBWE path as new_swb_speech is not needed for TDBWE. */ + mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME48k ); + } + else + { + if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE ) + { + /* resample 48 kHz to 32kHz */ + if ( st->last_bwidth == FB ) + { + inner_frame = L_FRAME48k; + inner_Fs = 48000; + mvr2r( hBWE_FD->old_fdbwe_speech, new_swb_speech, L_FRAME48k ); + } + else + { + inner_frame = L_FRAME32k; + inner_Fs = 32000; + + if ( st->element_mode != IVAS_CPE_DFT ) + { + decimate_2_over_3_allpass( hBWE_FD->old_fdbwe_speech, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp ); + } + else + { + lerp_flt( hBWE_FD->old_fdbwe_speech, new_swb_speech, inner_frame, L_FRAME48k ); + } + + if ( st->element_mode == IVAS_CPE_DFT && st->idchan == 0 ) + { + for ( i = 0; i < STEREO_DFT_OVL_32k; i++ ) + { + hCPE->hStereoDft->output_mem_dmx_32k[i] = new_swb_speech[inner_frame - STEREO_DFT_OVL_32k + i] * hCPE->hStereoDft->win_32k[STEREO_DFT_OVL_32k - 1 - i]; + } + } + } + + Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ); + if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k ) + { + Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS ); + } + if ( st->element_mode > EVS_MONO ) + { + Sample_Delay_SWB_BWE -= NS2SA( inner_Fs, DELAY_FIR_RESAMPL_NS ); + } + + mvr2r( new_swb_speech, &old_input[Sample_Delay_SWB_BWE], inner_frame ); + set_f( old_input, 0, Sample_Delay_SWB_BWE ); + mvr2r( new_swb_speech + inner_frame - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE ); + mvr2r( old_input, hBWE_FD->old_wtda_swb, inner_frame ); + } + + /* resample 48 kHz to 32kHz */ + if ( st->bwidth == FB ) + { + mvr2r( st->input, new_swb_speech, L_FRAME48k ); + } + else + { + if ( st->element_mode == IVAS_CPE_TD ) + { + float dec_2_over_3_mem_tmp[L_FILT_2OVER3], dec_2_over_3_mem_lp_tmp[L_FILT_2OVER3_LP]; + + decimate_2_over_3_allpass( st->input, L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp ); + + mvr2r( hBWE_TD->dec_2_over_3_mem, dec_2_over_3_mem_tmp, L_FILT_2OVER3 ); + mvr2r( hBWE_TD->dec_2_over_3_mem_lp, dec_2_over_3_mem_lp_tmp, L_FILT_2OVER3_LP ); + + decimate_2_over_3_allpass( st->input + L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, new_swb_speech + L_FRAME32k - lMemRecalc32k, dec_2_over_3_mem_tmp, dec_2_over_3_mem_lp_tmp ); + } + else if ( st->element_mode != IVAS_CPE_DFT ) + { + decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp ); + } + else /* IVAS_CPE_DFT */ + { + /*flt2fix*/ + f2me_buf( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], &hCPE->hStereoDft->DFT_fx_e[0], STEREO_DFT_N_MAX_ENC ); + f2me_buf( hCPE->hStereoDft->DFT[1], hCPE->hStereoDft->DFT_fx[1], &hCPE->hStereoDft->DFT_fx_e[1], STEREO_DFT_N_MAX_ENC ); + f2me( hCPE->hStereoDft->icbweRefEner, &hCPE->hStereoDft->icbweRefEner_fx, &hCPE->hStereoDft->icbweRefEner_fx_e ); + f2me( hCPE->hStereoDft->lbEner, &hCPE->hStereoDft->lbEner_fx, &hCPE->hStereoDft->lbEner_fx_e ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_res_8k, hCPE->hStereoDft->output_mem_res_8k_fx, 16, STEREO_DFT_OVL_8k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx, hCPE->hStereoDft->output_mem_dmx_fx, 16, STEREO_DFT_OVL_MAX ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_12k8, hCPE->hStereoDft->output_mem_dmx_12k8_fx, 16, STEREO_DFT_OVL_12k8 ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k, hCPE->hStereoDft->output_mem_dmx_16k_fx, 16, STEREO_DFT_OVL_16k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb, hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, 16, STEREO_DFT_OVL_16k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_32k, hCPE->hStereoDft->output_mem_dmx_32k_fx, 16, STEREO_DFT_OVL_32k ); + /*flt2fix end*/ + + Word16 out_start_ind, out_end_ind; + stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, new_swb_speech_fx, &out_start_ind, &out_end_ind, st->idchan, input_Fs, 32000, 0, NULL ); + + /*fix2flt*/ + hCPE->hStereoDft->icbweRefEner = me2f( hCPE->hStereoDft->icbweRefEner_fx, hCPE->hStereoDft->icbweRefEner_fx_e ); + hCPE->hStereoDft->lbEner = me2f( hCPE->hStereoDft->lbEner_fx, hCPE->hStereoDft->lbEner_fx_e ); + fixedToFloat_arrL( new_swb_speech_fx + out_start_ind, new_swb_speech + out_start_ind, 16, out_end_ind - out_start_ind ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_res_8k_fx, hCPE->hStereoDft->output_mem_res_8k, 16, STEREO_DFT_OVL_8k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_fx, hCPE->hStereoDft->output_mem_dmx, 16, STEREO_DFT_OVL_MAX ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_12k8_fx, hCPE->hStereoDft->output_mem_dmx_12k8, 16, STEREO_DFT_OVL_12k8 ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_fx, hCPE->hStereoDft->output_mem_dmx_16k, 16, STEREO_DFT_OVL_16k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, hCPE->hStereoDft->output_mem_dmx_16k_shb, 16, STEREO_DFT_OVL_16k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_32k_fx, hCPE->hStereoDft->output_mem_dmx_32k, 16, STEREO_DFT_OVL_32k ); + /*fix2flt end*/ + + mvr2r( new_swb_speech - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k ); + } + } + } + } + else + { + /* resample 48 kHz to 32kHz */ + if ( st->bwidth == FB ) + { + mvr2r( st->input, new_swb_speech, L_FRAME48k ); + } + else + { + decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp ); + } + } + } + + if ( ( st->core == ACELP_CORE && st->extl != SWB_BWE_HIGHRATE && st->extl != FB_BWE_HIGHRATE ) || + ( ( st->total_brate == ACELP_9k60 || st->rf_mode ) && st->bwidth == SWB && st->element_mode == EVS_MONO ) ) + { + float CldfbHB = 0; + Word32 CldfbHB_fx = 0; // fixed counterpart + Word16 CldfbHB_fx_e = 0; // fixed counterpart + + if ( st->element_mode == IVAS_CPE_DFT ) + { + + /*flt2fix*/ + f2me_buf( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], &hCPE->hStereoDft->DFT_fx_e[0], STEREO_DFT_N_MAX_ENC ); + f2me_buf( hCPE->hStereoDft->DFT[1], hCPE->hStereoDft->DFT_fx[1], &hCPE->hStereoDft->DFT_fx_e[1], STEREO_DFT_N_MAX_ENC ); + f2me( hCPE->hStereoDft->icbweRefEner, &hCPE->hStereoDft->icbweRefEner_fx, &hCPE->hStereoDft->icbweRefEner_fx_e ); + f2me( hCPE->hStereoDft->lbEner, &hCPE->hStereoDft->lbEner_fx, &hCPE->hStereoDft->lbEner_fx_e ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_res_8k, hCPE->hStereoDft->output_mem_res_8k_fx, 16, STEREO_DFT_OVL_8k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx, hCPE->hStereoDft->output_mem_dmx_fx, 16, STEREO_DFT_OVL_MAX ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_12k8, hCPE->hStereoDft->output_mem_dmx_12k8_fx, 16, STEREO_DFT_OVL_12k8 ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k, hCPE->hStereoDft->output_mem_dmx_16k_fx, 16, STEREO_DFT_OVL_16k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb, hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, 16, STEREO_DFT_OVL_16k ); + floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_32k, hCPE->hStereoDft->output_mem_dmx_32k_fx, 16, STEREO_DFT_OVL_32k ); + /*flt2fix end*/ + + Word16 out_start_ind, out_end_ind; + CldfbHB_fx = stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_input_fx + STEREO_DFT_OVL_16k, &out_start_ind, &out_end_ind, st->idchan, input_Fs, 16000, st->L_frame, &CldfbHB_fx_e ); + + /*fix2flt*/ + CldfbHB = me2f( CldfbHB_fx, CldfbHB_fx_e ); + hCPE->hStereoDft->icbweRefEner = me2f( hCPE->hStereoDft->icbweRefEner_fx, hCPE->hStereoDft->icbweRefEner_fx_e ); + hCPE->hStereoDft->lbEner = me2f( hCPE->hStereoDft->lbEner_fx, hCPE->hStereoDft->lbEner_fx_e ); + fixedToFloat_arrL( old_input_fx + STEREO_DFT_OVL_16k + out_start_ind, old_input + STEREO_DFT_OVL_16k + out_start_ind, 16, out_end_ind - out_start_ind ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_res_8k_fx, hCPE->hStereoDft->output_mem_res_8k, 16, STEREO_DFT_OVL_8k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_fx, hCPE->hStereoDft->output_mem_dmx, 16, STEREO_DFT_OVL_MAX ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_12k8_fx, hCPE->hStereoDft->output_mem_dmx_12k8, 16, STEREO_DFT_OVL_12k8 ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_fx, hCPE->hStereoDft->output_mem_dmx_16k, 16, STEREO_DFT_OVL_16k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, hCPE->hStereoDft->output_mem_dmx_16k_shb, 16, STEREO_DFT_OVL_16k ); + fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_32k_fx, hCPE->hStereoDft->output_mem_dmx_32k, 16, STEREO_DFT_OVL_32k ); + /*fix2flt end*/ + + /* delay corresponding to CLDFB delay */ + mvr2r( old_input + STEREO_DFT_OVL_16k - 20, shb_speech, L_FRAME16k ); + mvr2r( old_input, hBWE_TD->old_speech_shb + L_LOOK_16k + L_SUBFR16k - ( STEREO_DFT_OVL_16k - 20 ), STEREO_DFT_OVL_16k - 20 ); + mvr2r( old_input, hCPE->hStereoICBWE->mem_shb_speech_ref, STEREO_DFT_OVL_16k - 20 ); + + if ( CldfbHB <= 0 ) + { + CldfbHB = 1.0f; + } + hBWE_TD->cldfbHBLT_flt = 0.9f * hBWE_TD->cldfbHBLT_flt + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) ); + + lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner ); + hCPE->hStereoICBWE->icbweRefEner = 0.05f * (float) sqrt( hCPE->hStereoDft->icbweRefEner ); + lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner ); + thr = icbwe_thr_DFT; + regV = icbwe_regressionValuesDFT; + } + else + { + if ( st->L_frame == L_FRAME ) + { + startB = 34; + endB = 14; + for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ ) + { + for ( nB = startB, uB = 0; nB > endB; nB--, uB++ ) + { + sign = ( ts % 2 ) ? 1.0f : -1.0f; + realBufferFlipped[ts][uB] = -sign * realBuffer[ts][nB]; + imagBufferFlipped[ts][uB] = sign * imagBuffer[ts][nB]; + } + } + } + else + { + startB = 39; + endB = 19; + for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ ) + { + for ( nB = startB, uB = 0; nB > endB; nB--, uB++ ) + { + realBufferFlipped[ts][uB] = -realBuffer[ts][nB]; + imagBufferFlipped[ts][uB] = imagBuffer[ts][nB]; + } + } + } + + for ( nB = 0; nB < 10; nB++ ) + { + for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ ) + { + CldfbHB += ( realBufferFlipped[ts][nB] * realBufferFlipped[ts][nB] + imagBufferFlipped[ts][nB] * imagBufferFlipped[ts][nB] ); + } + } + if ( CldfbHB <= 0 ) + { + CldfbHB = 1.0f; + } + hBWE_TD->cldfbHBLT_flt = 0.9f * hBWE_TD->cldfbHBLT_flt + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) ); + + if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL ) + { + hCPE->hStereoICBWE->icbweRefEner = EPSILON; + for ( nB = 20; nB < 40; nB++ ) + { + for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ ) + { + hCPE->hStereoICBWE->icbweRefEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] ); + } + } + hCPE->hStereoICBWE->icbweRefEner = 0.05f * sqrtf( hCPE->hStereoICBWE->icbweRefEner ); + } + + lbEner = EPSILON; + for ( nB = 0; nB < 20; nB++ ) + { + for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ ) + { + lbEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] ); + } + } + lbEner = 0.05f * sqrtf( lbEner ); + thr = icbwe_thr_TDM; + regV = icbwe_regressionValuesTDM; + + cldfbSynthesis_ivas( realBufferFlipped, imagBufferFlipped, shb_speech, -1, st->cldfbSynTd ); + } + + if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL ) + { + hCPE->hStereoICBWE->MSFlag = 0; /* Init the multi-source flag */ + v = 0.3333f * sum_f( st->voicing, 3 ); + t = log10f( ( hCPE->hStereoICBWE->icbweRefEner + 1e-6f ) / ( lbEner + 1e-6f ) ); + + /* Three Level Decision Tree to calculate a regression value first */ + if ( t < thr[0] ) /* level 1 */ + { + if ( t < thr[1] ) /* level 2 */ + { + regression = ( v < thr[3] ) ? regV[0] : regV[1]; /* level 3 */ + } + else + { + regression = ( v < thr[4] ) ? regV[2] : regV[3]; /* level 3 */ + } + } + else + { + if ( t < thr[2] ) /* level 2 */ + { + regression = ( v < thr[5] ) ? regV[4] : regV[5]; /* level 3 */ + } + else + { + regression = ( v < thr[6] ) ? regV[6] : regV[7]; /* level 3 */ + } + } + + /* Convert the regression to a hard decision (classification) */ + if ( regression > 0.79f && !( st->bwidth < SWB || hCPE->hCoreCoder[0]->vad_flag == 0 ) ) + { + hCPE->hStereoICBWE->MSFlag = 1; + } + } + + if ( st->extl != WB_TBE && st->extl != SWB_TBE && st->extl != FB_TBE ) + { + /* Update the previous superwideband speech buffer in case of a SWB_BWE frame - this code is in swb_tbe_enc */ + delay = L_LOOK_16k + L_SUBFR16k; + mvr2r( shb_speech + L_FRAME16k - delay, hBWE_TD->old_speech_shb, delay ); + } + } + else + { + if ( ( st->bwidth == FB || st->core == ACELP_CORE ) && ( st->element_mode == EVS_MONO ) ) + { + InitSWBencBufferStates( st->hBWE_TD, shb_speech ); + } + else + { + if ( st->element_mode == IVAS_CPE_DFT ) + { + if ( st->L_frame == L_FRAME ) + { + L_resamp = 560; /* 6.4 kHz core -> 6 - 14 kHz SHB target. 20 ms is 560 samples in 28 kHz sample rate */ + } + else + { + L_resamp = 620; /* 8 kHz core -> 7.5 - 15.5 kHz SHB target. 20 ms is 620 samples in 31 kHz sample rate */ + } + + /* Dirty downsampling to match Nyquist to upper frequency limit of target */ + lerp_flt( st->input, new_swb_speech, L_resamp, (int16_t) ( input_Fs / 50 ) ); + + /* flip the spectrum */ + mvr2r( new_swb_speech, spchTmp, L_resamp ); + for ( i = 0; i < L_resamp; i = i + 2 ) + { + spchTmp[i] = -spchTmp[i]; + } + + /* Dirty upsampling to match Nyquist/2 to lower frequency limit of target (reversed spectrum)*/ + lerp_flt( spchTmp, spchTmp2, L_FRAME32k, L_resamp ); + mvr2r( spchTmp2, spchTmp, L_FRAME32k ); + } + else + { + /* flip the spectrum */ + mvr2r( new_swb_speech, spchTmp, L_FRAME32k ); + + for ( i = 0; i < L_FRAME32k; i = i + 2 ) + { + spchTmp[i] = -spchTmp[i]; + } + } + + Decimate_allpass_steep( spchTmp, hBWE_TD->state_ana_filt_shb, L_FRAME32k, shb_speech ); + + mvr2r( shb_speech + L_FRAME16k - ( L_LOOK_16k + L_SUBFR16k ), hBWE_TD->old_speech_shb, L_LOOK_16k + L_SUBFR16k ); + + /*Compute the past overlap for potential next iDFTs SHB*/ + if ( st->element_mode == IVAS_CPE_DFT ) + { + for ( i = 0; i < STEREO_DFT_OVL_16k; i++ ) + { + hCPE->hStereoDft->output_mem_dmx_16k_shb[i] = shb_speech[20 + i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i]; + } + } + } + + if ( st->element_mode != IVAS_CPE_DFT ) + { + /* Reset CLDFB synthesis buffer */ + set_f( st->cldfbSynTd->cldfb_state, 0.0f, st->cldfbSynTd->p_filter_length ); + } + else + { + hCPE->hStereoDft->flip_sign = -hCPE->hStereoDft->flip_sign; /* Make sure sign is updated even if DFT SHB target is not generated */ + } + } + + /* Memory reset to compensate for 0.9375 ms offset when transitioning from IO to SWB */ + /* When switching from n >1 to n = 1, we keep the enc/dec delay as 8.75/3.25 and below code not needed; + only when n = 1 start, it will be 9.6875/2.3125 in that case this reset is needed for IO->BWE.*/ + if ( st->last_extl == -1 && st->element_mode == EVS_MONO ) + { + delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ); + for ( i = 0; i < delay; i++ ) + { + shb_speech[i] = (float) i * ( 0.03f * shb_speech[2 * delay - 1 - i] ); + } + } + + return; +} +#endif void swb_pre_proc( Encoder_State *st, /* i/o: encoder state structure */ float *new_swb_speech, /* o : original input signal at 32kHz */ -- GitLab From 1c69d8b9b63d798c4af87d88aa437727fe106200 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 19 Jul 2024 14:45:39 +0530 Subject: [PATCH 2/2] Clang formatting changes --- lib_enc/ivas_core_enc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index 0d39208e7..37b321bb5 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -862,4 +862,4 @@ ivas_error ivas_core_enc( return error; } -#endif \ No newline at end of file +#endif -- GitLab