From ff64feadad387e41950254f37781ae124538ef50 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Fri, 19 Jul 2024 14:42:27 +0530
Subject: [PATCH 1/2] stereo_dft_enc_synthesize_fx implementation

---
 lib_com/fft_fx.c              |  18 +
 lib_com/ivas_prot_fx.h        |  14 +
 lib_com/prot_fx.h             |  12 +
 lib_enc/ivas_core_enc.c       | 410 ++++++++++++++++++-
 lib_enc/ivas_cpe_enc.c        | 748 +++++++++++++++++++++++++++++++++-
 lib_enc/ivas_stat_enc.h       |  21 +-
 lib_enc/ivas_stereo_dft_enc.c | 478 +++++++++++++++++++++-
 lib_enc/swb_pre_proc.c        | 548 +++++++++++++++++++++++++
 8 files changed, 2230 insertions(+), 19 deletions(-)

diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c
index d7a1aa12f..024221ca6 100644
--- a/lib_com/fft_fx.c
+++ b/lib_com/fft_fx.c
@@ -7111,6 +7111,12 @@ void rfft_fx(
             s2 = -204;
             move16();
             BREAK;
+        case 256:
+            s1 = 128;
+            move16();
+            s2 = -128;
+            move16();
+            BREAK;
         case 320:
             s1 = 102;
             move16();
@@ -7123,6 +7129,18 @@ void rfft_fx(
             s2 = -68;
             move16();
             BREAK;
+        case 640:
+            s1 = 51;
+            move16();
+            s2 = -51;
+            move16();
+            BREAK;
+        case 960:
+            s1 = 34;
+            move16();
+            s2 = -34;
+            move16();
+            BREAK;
         default:
             s1 = -1;
             move16();
diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h
index 39e8de6a0..c24ad3237 100644
--- a/lib_com/ivas_prot_fx.h
+++ b/lib_com/ivas_prot_fx.h
@@ -1123,6 +1123,20 @@ void stereo_dft_dec_fx(
     const Word16 num_md_sub_frames                /* i  : number of MD subframes             */
 );
 
+// ivas_stereo_dft_enc.c
+#ifdef IVAS_FLOAT_FIXED
+Word32 stereo_dft_enc_synthesize_fx(
+    STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle       */
+    // float *output,                         /* o  : output synthesis            */
+    Word32 *output_fx, /* o  : output synthesis           Q16 */
+    Word16 *output_start_index,
+    Word16 *output_end_index,
+    const Word16 chan,                 /* i  : channel number              */
+    const Word32 input_Fs,             /* i  : input sampling rate         */
+    const Word32 output_sampling_rate, /* i  : output sampling rate        */
+    const Word16 L_frame,              /* i  : frame length at internal Fs */
+    Word16 *nrg_out_fx_e );
+#endif
 
 void ivas_ls_setup_conversion_fx(
     Decoder_Struct *st_ivas,   /* i  : IVAS decoder structure           */
diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h
index 524cf7b73..b3e016846 100644
--- a/lib_com/prot_fx.h
+++ b/lib_com/prot_fx.h
@@ -63,6 +63,7 @@
 #include "ivas_cnst.h"
 #include "stat_enc.h"
 #include "stat_dec.h"
+#include "ivas_stat_enc.h"
 #include "ivas_stat_dec.h"
 #include "ivas_error.h"
 #include "ivas_error_utils.h"
@@ -10068,4 +10069,15 @@ Word16 sr2fscale(
     const Word32 sr_core /* i  : internal sampling rate        */
 );
 
+// pre_proc functions
+/*full implementation pending*/
+void swb_pre_proc_ivas_fx(
+    Encoder_State *st,                                         /* i/o: encoder state structure                 */
+    float *new_swb_speech,                                     /* o  : original input signal at 32kHz          */
+    Word32 *new_swb_speech_fx,                                 /* o  : original input signal at 32kHz          */
+    float *shb_speech,                                         /* o  : SHB target signal (6-14kHz) at 16kHz    */
+    float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : real buffer                             */
+    float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : imag buffer                             */
+    CPE_ENC_HANDLE hCPE                                        /* i/o: CPE encoder structure                   */
+);
 #endif
diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c
index ebc6bd649..0d39208e7 100644
--- a/lib_enc/ivas_core_enc.c
+++ b/lib_enc/ivas_core_enc.c
@@ -40,6 +40,7 @@
 #include "wmc_auto.h"
 #include <math.h>
 #ifdef IVAS_FLOAT_FIXED
+#include "prot_fx.h"
 #include "ivas_prot_fx.h"
 #endif
 
@@ -49,7 +50,7 @@
  *
  * Principal IVAS core coder routine, where number of core channels is 1 or 2
  *-------------------------------------------------------------------*/
-
+#ifdef IVAS_FLOAT_FIXED
 ivas_error ivas_core_enc(
     SCE_ENC_HANDLE hSCE,                                         /* i/o: SCE encoder structure                   */
     CPE_ENC_HANDLE hCPE,                                         /* i/o: CPE encoder structure                   */
@@ -90,6 +91,10 @@ ivas_error ivas_core_enc(
     float hb_speech[L_FRAME16k / 4];
     float *new_swb_speech;
     float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX];
+#ifdef IVAS_FLOAT_FIXED
+    Word32 *new_swb_speech_fx;
+    Word32 new_swb_speech_buffer_fx[L_FRAME48k + STEREO_DFT_OVL_MAX];
+#endif
     float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET];
     float voice_factors[CPE_CHANNELS][NB_SUBFR16k];
     int16_t Voicing_flag[CPE_CHANNELS];
@@ -321,6 +326,408 @@ ivas_error ivas_core_enc(
     }
 
 
+    /*---------------------------------------------------------------------*
+     * Postprocessing, BWEs and Updates
+     *---------------------------------------------------------------------*/
+
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        st = sts[n];
+
+        /*---------------------------------------------------------------------*
+         * Postprocessing for ACELP/HQ core switching
+         *---------------------------------------------------------------------*/
+
+        core_switching_post_enc( st, old_inp_12k8[n], old_inp_16k[n], A[n] );
+
+        /*---------------------------------------------------------------------*
+         * WB TBE encoding
+         * WB BWE encoding
+         *---------------------------------------------------------------------*/
+
+        if ( input_Fs >= 16000 && st->bwidth < SWB && st->hBWE_TD != NULL )
+        {
+            /* Common pre-processing for WB TBE and WB BWE */
+            wb_pre_proc( st, last_element_mode, new_inp_resamp16k[n], hb_speech );
+        }
+
+        if ( st->extl == WB_TBE )
+        {
+            /* WB TBE encoder */
+            wb_tbe_enc( st, hb_speech, bwe_exc_extended[n], voice_factors[n], pitch_buf[n] );
+        }
+        else if ( st->extl == WB_BWE && n == 0 && st->element_mode != IVAS_CPE_MDCT )
+        {
+            /* WB BWE encoder */
+            wb_bwe_enc( st, new_inp_resamp16k[n] );
+        }
+
+        /*---------------------------------------------------------------------*
+         * SWB(FB) TBE encoding
+         * SWB(FB) BWE encoding
+         *---------------------------------------------------------------------*/
+
+        new_swb_speech = new_swb_speech_buffer + STEREO_DFT_OVL_MAX;
+#ifdef IVAS_FLOAT_FIXED
+        new_swb_speech_fx = new_swb_speech_buffer_fx + STEREO_DFT_OVL_MAX;
+#endif
+
+        if ( !st->Opt_SC_VBR && input_Fs >= 32000 && st->hBWE_TD != NULL )
+        {
+            /* Common pre-processing for SWB(FB) TBE and SWB(FB) BWE */
+#ifdef IVAS_FLOAT_FIXED
+            swb_pre_proc_ivas_fx( st, new_swb_speech, new_swb_speech_fx, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
+#else
+            swb_pre_proc( st, new_swb_speech, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
+#endif
+        }
+        else if ( input_Fs >= 32000 )
+        {
+            if ( st->hBWE_TD != NULL )
+            {
+                InitSWBencBufferStates( st->hBWE_TD, shb_speech );
+            }
+        }
+
+        /* SWB TBE encoder */
+        if ( st->extl == SWB_TBE || st->extl == FB_TBE )
+        {
+            if ( st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 )
+            {
+                float fb_exc[L_FRAME16k];
+
+                swb_tbe_enc( st, hStereoICBWE, shb_speech, bwe_exc_extended[n], voice_factors[n], fb_exc, pitch_buf[n] );
+
+                if ( st->extl == FB_TBE )
+                {
+                    /* FB TBE encoder */
+                    fb_tbe_enc( st, st->input, fb_exc );
+                }
+            }
+        }
+        else if ( st->extl == SWB_BWE || st->extl == FB_BWE )
+        {
+            /* SWB(FB) BWE encoder */
+            swb_bwe_enc( st, last_element_mode, old_inp_12k8[n], old_inp_16k[n], old_syn_12k8_16k[n], new_swb_speech, shb_speech );
+        }
+
+        /*---------------------------------------------------------------------*
+         * SWB DTX/CNG encoding
+         *---------------------------------------------------------------------*/
+
+        if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && ( input_frame >= L_FRAME32k || st->element_mode == IVAS_CPE_DFT ) )
+        {
+            /* SHB DTX/CNG encoder */
+            swb_CNG_enc( st, shb_speech, old_syn_12k8_16k[n] );
+        }
+
+        /*-------------------------------------------------------------------*
+         *  Inter-channel BWE encoding
+         *-------------------------------------------------------------------*/
+
+        if ( n == 0 && input_Fs >= 32000 && hStereoICBWE != NULL )
+        {
+            stereo_icBWE_preproc( hCPE, input_frame, new_swb_speech_buffer /*tmp buffer*/ );
+
+            stereo_icBWE_enc( hCPE, shb_speech, new_swb_speech_buffer, voice_factors[0] );
+        }
+
+        /*---------------------------------------------------------------------*
+         * Channel-aware mode - write signaling information into the bitstream
+         *---------------------------------------------------------------------*/
+
+        signaling_enc_rf( st );
+
+        /*---------------------------------------------------------------------*
+         * Common updates
+         *---------------------------------------------------------------------*/
+
+        if ( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */
+        {
+            updt_enc_common( st );
+        }
+    }
+
+    /*------------------------------------------------------------------*
+     * Write potentially unused bits in combined format coding
+     *-----------------------------------------------------------------*/
+
+    if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 )
+    {
+        while ( diff_nBits > 0 )
+        {
+            n = min( diff_nBits, 16 );
+            push_indice( sts[0]->hBstr, IND_UNUSED, 0, n );
+            diff_nBits -= n;
+        }
+    }
+
+
+    pop_wmops();
+
+    return error;
+}
+#else
+ivas_error ivas_core_enc(
+    SCE_ENC_HANDLE hSCE,                                         /* i/o: SCE encoder structure                   */
+    CPE_ENC_HANDLE hCPE,                                         /* i/o: CPE encoder structure                   */
+    MCT_ENC_HANDLE hMCT,                                         /* i/o: MCT encoder structure                   */
+    const int16_t n_CoreChannels,                                /* i  : number of core channels to be coded     */
+    float old_inp_12k8[][L_INP_12k8],                            /* i  : buffer of old input signal              */
+    float old_inp_16k[][L_INP],                                  /* i  : buffer of old input signal              */
+    float ener[],                                                /* i  : residual energy from Levinson-Durbin    */
+    float A[][NB_SUBFR16k * ( M + 1 )],                          /* i  : A(z) unquantized for the 4 subframes    */
+    float Aw[][NB_SUBFR16k * ( M + 1 )],                         /* i  : weighted A(z) unquantized for subframes */
+    float epsP[][M + 1],                                         /* i  : LP prediction errors                    */
+    float lsp_new[][M],                                          /* i  : LSPs at the end of the frame            */
+    float lsp_mid[][M],                                          /* i  : LSPs in the middle of the frame         */
+    const int16_t vad_hover_flag[],                              /* i  : VAD hanglover flag                      */
+    int16_t attack_flag[],                                       /* i  : attack flag (GSC or TC)                 */
+    float realBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer                             */
+    float imagBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer                             */
+    float old_wsp[][L_WSP],                                      /* i  : weighted input signal buffer            */
+    const int16_t loc_harm[],                                    /* i  : harmonicity flag                        */
+    const float cor_map_sum[],                                   /* i  : speech/music clasif. parameter          */
+    const int16_t vad_flag_dtx[],                                /* i  : HE-SAD flag with additional DTX HO      */
+    float enerBuffer[][CLDFB_NO_CHANNELS_MAX],                   /* i  : energy buffer                           */
+    float fft_buff[][2 * L_FFT],                                 /* i  : FFT buffer                              */
+    const int16_t tdm_SM_or_LRTD_Pri,                            /* i  : channel combination scheme flag         */
+    const int16_t ivas_format,                                   /* i  : IVAS format                             */
+    const int16_t flag_16k_smc                                   /* i  : flag to indicate if the OL SMC is run at 16 kHz */
+)
+{
+    int16_t n, input_frame;
+    int16_t cpe_id, MCT_flag;
+    Encoder_State **sts, *st;
+    STEREO_ICBWE_ENC_HANDLE hStereoICBWE;
+    STEREO_TD_ENC_DATA_HANDLE hStereoTD;
+    float *inp[CPE_CHANNELS];
+    float new_inp_resamp16k[CPE_CHANNELS][L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
+    float old_syn_12k8_16k[CPE_CHANNELS][L_FRAME16k];  /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */
+    float shb_speech[L_FRAME16k];
+    float hb_speech[L_FRAME16k / 4];
+    float *new_swb_speech;
+    float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX];
+    float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET];
+    float voice_factors[CPE_CHANNELS][NB_SUBFR16k];
+    int16_t Voicing_flag[CPE_CHANNELS];
+    float pitch_buf[CPE_CHANNELS][NB_SUBFR16k];
+    int16_t unbits[CPE_CHANNELS];
+    float tdm_lspQ_PCh[M], tdm_lsfQ_PCh[M];
+    int16_t last_element_mode, tdm_Pitch_reuse_flag;
+    int32_t element_brate, last_element_brate, input_Fs;
+    int16_t diff_nBits;
+    ivas_error error;
+    int16_t max_num_indices_BWE;
+
+    push_wmops( "ivas_core_enc" );
+
+    error = IVAS_ERR_OK;
+
+    /*------------------------------------------------------------------*
+     * General initialization
+     *-----------------------------------------------------------------*/
+
+    if ( hSCE != NULL )
+    {
+        cpe_id = -1;
+        MCT_flag = 0;
+        sts = hSCE->hCoreCoder;
+        hStereoTD = NULL;
+        hStereoICBWE = NULL;
+        element_brate = hSCE->element_brate;
+        last_element_brate = hSCE->last_element_brate;
+        last_element_mode = IVAS_SCE;
+        tdm_Pitch_reuse_flag = -1;
+    }
+    else
+    {
+        cpe_id = hCPE->cpe_id;
+        MCT_flag = 0;
+        if ( hMCT != NULL )
+        {
+            MCT_flag = 1;
+        }
+        sts = hCPE->hCoreCoder;
+        hStereoICBWE = hCPE->hStereoICBWE;
+        element_brate = hCPE->element_brate;
+        last_element_brate = hCPE->last_element_brate;
+        last_element_mode = hCPE->last_element_mode;
+
+        if ( hCPE->hStereoTD != NULL )
+        {
+            hStereoTD = hCPE->hStereoTD;
+            tdm_Pitch_reuse_flag = hCPE->hStereoTD->tdm_Pitch_reuse_flag;
+        }
+        else
+        {
+            hStereoTD = NULL;
+            tdm_Pitch_reuse_flag = -1;
+        }
+    }
+
+    input_Fs = sts[0]->input_Fs;
+    input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC );
+
+    set_f( new_swb_speech_buffer, 0, L_FRAME48k + STEREO_DFT_OVL_MAX );
+
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        st = sts[n];
+
+        /*------------------------------------------------------------------*
+         * Initializiation per core-coder channel
+         *-----------------------------------------------------------------*/
+
+
+        st->extl = -1;
+        unbits[n] = 0;
+
+        st->element_brate = element_brate;
+
+        /*---------------------------------------------------------------------*
+         * Pre-processing, incl. Decision matrix
+         *---------------------------------------------------------------------*/
+
+        if ( ( error = pre_proc_ivas( st, last_element_mode, element_brate, ivas_format == SBA_FORMAT ? last_element_brate : element_brate, input_frame, old_inp_12k8[n], old_inp_16k[n], &inp[n], &ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], new_inp_resamp16k[n], &Voicing_flag[n], old_wsp[n], loc_harm[n], cor_map_sum[n], vad_flag_dtx[n], enerBuffer[n], fft_buff[n], MCT_flag, vad_hover_flag[n], flag_16k_smc ) ) != IVAS_ERR_OK )
+        {
+            return error;
+        }
+
+        if ( st->element_mode == IVAS_CPE_MDCT || st->element_mode == IVAS_SCE )
+        {
+            st->enablePlcWaveadjust = 0;
+        }
+    }
+
+    /*------------------------------------------------------------------*
+     * Sanity check in combined format coding
+     *-----------------------------------------------------------------*/
+
+    diff_nBits = 0;
+    if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 )
+    {
+        ivas_combined_format_brate_sanity( hCPE->element_brate, sts[0]->core, sts[0]->total_brate, &( sts[0]->core_brate ), &( sts[0]->inactive_coder_type_flag ), &diff_nBits );
+    }
+
+    /*---------------------------------------------------------------------*
+     * Core Encoding
+     *---------------------------------------------------------------------*/
+
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        st = sts[n];
+
+        /* update pointer to the buffer of indices of the second channel */
+        if ( n == 1 && st->element_mode == IVAS_CPE_TD )
+        {
+            /* adjust the pointer to the buffer of indices of the secondary channel (make space for BWE indices) */
+            max_num_indices_BWE = get_BWE_max_num_indices( sts[0]->extl_brate );
+            st->hBstr->ind_list = sts[0]->hBstr->ind_list + sts[0]->hBstr->nb_ind_tot + max_num_indices_BWE;
+
+            /* write TD stereo spatial parameters */
+            move_indices( hStereoTD->tdm_hBstr_tmp.ind_list, st->hBstr->ind_list, hStereoTD->tdm_hBstr_tmp.nb_ind_tot );
+            st->hBstr->nb_ind_tot += hStereoTD->tdm_hBstr_tmp.nb_ind_tot;
+            st->hBstr->nb_bits_tot += hStereoTD->tdm_hBstr_tmp.nb_bits_tot;
+
+            reset_indices_enc( &hStereoTD->tdm_hBstr_tmp, MAX_IND_TDM_TMP );
+        }
+
+        /*---------------------------------------------------------------------*
+         * Write signaling info into the bitstream
+         *---------------------------------------------------------------------*/
+
+        if ( !MCT_flag || ( MCT_flag && cpe_id == 0 ) )
+        {
+            ivas_signaling_enc( st, MCT_flag, element_brate, tdm_SM_or_LRTD_Pri, tdm_Pitch_reuse_flag );
+        }
+
+        /*---------------------------------------------------------------------*
+         * Preprocessing (preparing) for ACELP/HQ core switching
+         *---------------------------------------------------------------------*/
+
+        core_switching_pre_enc( st, old_inp_12k8[n], old_inp_16k[n], sts[0]->active_cnt, last_element_mode );
+
+        /*---------------------------------------------------------------------*
+         * ACELP core encoding
+         * TCX core encoding
+         * HQ core encoding
+         *---------------------------------------------------------------------*/
+
+        if ( st->core == ACELP_CORE )
+        {
+            /* ACELP core encoder */
+            if ( ( error = acelp_core_enc( st, inp[n], ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], vad_hover_flag[0], attack_flag[n], bwe_exc_extended[n], voice_factors[n], old_syn_12k8_16k[n], pitch_buf[n], &unbits[n], hStereoTD, tdm_lsfQ_PCh ) ) != IVAS_ERR_OK )
+            {
+                return error;
+            }
+        }
+
+        if ( ( st->core == TCX_20_CORE || st->core == TCX_10_CORE ) && st->element_mode != IVAS_CPE_MDCT )
+        {
+            /* TCX core encoder */
+            stereo_tcx_core_enc( st, old_inp_12k8[n] + L_INP_MEM, old_inp_16k[n] + L_INP_MEM, Aw[n], lsp_new[n], lsp_mid[n], pitch_buf[n], last_element_mode, vad_hover_flag[0] );
+        }
+
+        if ( st->core == HQ_CORE )
+        {
+            /* HQ core encoder */
+            hq_core_enc( st, st->input, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] );
+        }
+
+        /*---------------------------------------------------------------------*
+         * TD stereo updates
+         *---------------------------------------------------------------------*/
+
+        if ( st->element_mode == IVAS_CPE_TD && n == 0 )
+        {
+            td_stereo_param_updt( st->lsp_old, st->lsf_old, pitch_buf[0], tdm_lspQ_PCh, tdm_lsfQ_PCh, hStereoTD->tdm_Pri_pitch_buf, st->flag_ACELP16k, hStereoTD->tdm_use_IAWB_Ave_lpc );
+        }
+    }
+
+
+    /*---------------------------------------------------------------------*
+     * MDCT stereo: joint TCX Core Encoding
+     *---------------------------------------------------------------------*/
+
+    if ( sts[0]->element_mode == IVAS_CPE_MDCT )
+    {
+        if ( sts[0]->core_brate > SID_2k40 && sts[1]->core_brate > SID_2k40 )
+        {
+            if ( MCT_flag )
+            {
+                ivas_mdct_core_whitening_enc( hCPE, old_inp_16k, old_wsp, pitch_buf, hMCT->p_mdst_spectrum_long[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long[cpe_id],
+                                              hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE );
+            }
+            else
+            {
+                stereo_mdct_core_enc( hCPE, old_inp_16k, old_wsp, pitch_buf );
+            }
+        }
+        else if ( sts[0]->core_brate == SID_2k40 && sts[1]->core_brate == SID_2k40 )
+        {
+            /* synch CNG configs between channels */
+            for ( n = 0; n < CPE_CHANNELS; n++ )
+            {
+                st = sts[n];
+                if ( ( st->hFdCngEnc->hFdCngCom->frameSize != st->L_frame ) || ( st->hFdCngEnc->hFdCngCom->CngBandwidth != st->bwidth ) )
+                {
+                    configureFdCngEnc( st->hFdCngEnc, max( st->bwidth, WB ), st->L_frame == L_FRAME16k ? ACELP_16k40 : ACELP_9k60 );
+                }
+            }
+
+            if ( sts[0]->cng_sba_flag )
+            {
+                FdCngEncodeDiracMDCTStereoSID( hCPE );
+            }
+            else
+            {
+                FdCngEncodeMDCTStereoSID( hCPE );
+            }
+        }
+    }
+
+
     /*---------------------------------------------------------------------*
      * Postprocessing, BWEs and Updates
      *---------------------------------------------------------------------*/
@@ -455,3 +862,4 @@ ivas_error ivas_core_enc(
 
     return error;
 }
+#endif
\ No newline at end of file
diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c
index da80d795c..9b8b4bfc5 100644
--- a/lib_enc/ivas_cpe_enc.c
+++ b/lib_enc/ivas_cpe_enc.c
@@ -59,7 +59,7 @@ static void stereo_mode_combined_format_enc( const Encoder_Struct *st_ivas, CPE_
  *
  * Channel Pair Element (CPE) encoding routine
  *-------------------------------------------------------------------*/
-
+#ifdef IVAS_FLOAT_FIXED
 ivas_error ivas_cpe_enc(
     Encoder_Struct *st_ivas,       /* i/o: IVAS encoder structure          */
     const int16_t cpe_id,          /* i  : CPE # identifier                */
@@ -72,8 +72,14 @@ ivas_error ivas_cpe_enc(
     CPE_ENC_HANDLE hCPE;
     Encoder_State **sts;
     int16_t n, n_CoreChannels;
-    float old_inp_12k8[CPE_CHANNELS][L_INP_12k8];                            /* buffer of input signal @ 12k8            */
-    float old_inp_16k[CPE_CHANNELS][L_INP];                                  /* buffer of input signal @ 16kHz           */
+    float old_inp_12k8[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8            */
+#ifdef IVAS_FLOAT_FIXED
+    Word32 old_inp_12k8_fx[CPE_CHANNELS][L_INP_12k8]; /* buffer of input signal @ 12k8            */
+#endif
+    float old_inp_16k[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz           */
+#ifdef IVAS_FLOAT_FIXED
+    Word32 old_inp_16k_fx[CPE_CHANNELS][L_INP]; /* buffer of input signal @ 16kHz           */
+#endif
     float ener[CPE_CHANNELS];                                                /* residual energy from Levinson-Durbin     */
     float relE[CPE_CHANNELS];                                                /* frame relative energy                    */
     float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )];                          /* A(z) unquantized for subframes           */
@@ -455,6 +461,83 @@ ivas_error ivas_cpe_enc(
         internal_Fs = getTcxonly_ivas( IVAS_CPE_MDCT, sts[0]->bits_frame_nominal * FRAMES_PER_SEC, 0, sts[0]->is_ism_format ) == 0 ? INT_FS_16k : max( INT_FS_16k, sts[0]->sr_core );
 
         /* iDFT at input sampling rate */
+#ifdef IVAS_FLOAT_FIXED
+        /*flt2fix*/
+        f2me_buf( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], &hCPE->hStereoDft->DFT_fx_e[0], STEREO_DFT_N_MAX_ENC );
+        f2me_buf( hCPE->hStereoDft->DFT[1], hCPE->hStereoDft->DFT_fx[1], &hCPE->hStereoDft->DFT_fx_e[1], STEREO_DFT_N_MAX_ENC );
+        if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] )
+        {
+            floatToFixed_arr( sts[1]->old_inp_12k8, sts[1]->old_inp_12k8_fx, 0, L_INP_MEM );
+        }
+        f2me( hCPE->hStereoDft->icbweRefEner, &hCPE->hStereoDft->icbweRefEner_fx, &hCPE->hStereoDft->icbweRefEner_fx_e );
+        f2me( hCPE->hStereoDft->lbEner, &hCPE->hStereoDft->lbEner_fx, &hCPE->hStereoDft->lbEner_fx_e );
+        floatToFixed_arrL( hCPE->hStereoDft->output_mem_res_8k, hCPE->hStereoDft->output_mem_res_8k_fx, 16, STEREO_DFT_OVL_8k );
+        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx, hCPE->hStereoDft->output_mem_dmx_fx, 16, STEREO_DFT_OVL_MAX );
+        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_12k8, hCPE->hStereoDft->output_mem_dmx_12k8_fx, 16, STEREO_DFT_OVL_12k8 );
+        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k, hCPE->hStereoDft->output_mem_dmx_16k_fx, 16, STEREO_DFT_OVL_16k );
+        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb, hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, 16, STEREO_DFT_OVL_16k );
+        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_32k, hCPE->hStereoDft->output_mem_dmx_32k_fx, 16, STEREO_DFT_OVL_32k );
+        /*flt2fix end*/
+
+        // stereo_dft_enc_synthesize( hCPE->hStereoDft, sts[0]->input, 0, input_Fs, input_Fs, 0 );
+        Word16 out_start_ind, out_end_ind;
+        Word16 out_12k8_start_ind[CPE_CHANNELS], out_12k8_end_ind[CPE_CHANNELS];
+        Word16 out_16k_start_ind = 0, out_16k_end_ind = 0;
+        move16();
+        move16();
+        stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, sts[0]->input32_fx, &out_start_ind, &out_end_ind, 0, input_Fs, input_Fs, 0, NULL );
+
+        /* iDFT & resampling to 12.8kHz internal sampling rate */
+        // stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[0] + L_INP_MEM, 0, input_Fs, INT_FS_12k8, 0 );
+        stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_inp_12k8_fx[0] + L_INP_MEM, &out_12k8_start_ind[0], &out_12k8_end_ind[0], 0, input_Fs, INT_FS_12k8, 0, NULL );
+
+        /* iDFT & resampling to 16kHz internal sampling rate for M channel */
+        IF( EQ_32( input_Fs, internal_Fs ) )
+        {
+            // mvr2r( sts[0]->input - STEREO_DFT_OVL_16k, old_inp_16k[0] + L_INP_MEM - STEREO_DFT_OVL_16k, input_frame + STEREO_DFT_OVL_16k );
+            Copy32( sts[0]->input32_fx - STEREO_DFT_OVL_16k, old_inp_16k_fx[0] + L_INP_MEM - STEREO_DFT_OVL_16k, input_frame + STEREO_DFT_OVL_16k );
+            out_16k_start_ind = -STEREO_DFT_OVL_16k;
+            out_16k_end_ind = out_16k_start_ind + input_frame + STEREO_DFT_OVL_16k;
+        }
+        ELSE
+        {
+            // stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_16k[0] + L_INP_MEM, 0, input_Fs, internal_Fs, 0 );
+            stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_inp_16k_fx[0] + L_INP_MEM, &out_16k_start_ind, &out_16k_end_ind, 0, input_Fs, internal_Fs, 0, NULL );
+        }
+
+        /* DFT Stereo: iDFT of residual signal at 8kHz sampling rate */
+        test();
+        IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] )
+        {
+            // mvr2r( sts[1]->old_inp_12k8, old_inp_12k8[1], L_INP_MEM );
+            Copy_Scale_sig_16_32_no_sat( sts[1]->old_inp_12k8_fx, old_inp_12k8_fx[1], L_INP_MEM, 16 - 0 );
+            // stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[1] + L_INP_MEM, 1, input_Fs, 8000, 0 );
+            stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_inp_12k8_fx[1] + L_INP_MEM, &out_12k8_start_ind[1], &out_12k8_end_ind[1], 1, input_Fs, 8000, 0, NULL );
+
+            /* update old input signal buffer */
+            // mvr2r( old_inp_12k8[1] + L_FRAME8k, sts[1]->old_inp_12k8, L_INP_MEM );
+            Copy_Scale_sig_32_16( old_inp_12k8_fx[1] + L_FRAME8k, sts[1]->old_inp_12k8_fx, L_INP_MEM, 0 - 16 );
+        }
+
+        /*fix2flt*/
+        hCPE->hStereoDft->icbweRefEner = me2f( hCPE->hStereoDft->icbweRefEner_fx, hCPE->hStereoDft->icbweRefEner_fx_e );
+        hCPE->hStereoDft->lbEner = me2f( hCPE->hStereoDft->lbEner_fx, hCPE->hStereoDft->lbEner_fx_e );
+        fixedToFloat_arrL( sts[0]->input32_fx + out_start_ind, sts[0]->input + out_start_ind, 16, out_end_ind - out_start_ind );
+        fixedToFloat_arrL( old_inp_12k8_fx[0] + L_INP_MEM + out_12k8_start_ind[0], old_inp_12k8[0] + L_INP_MEM + out_12k8_start_ind[0], 16, out_12k8_end_ind[0] - out_12k8_start_ind[0] );
+        fixedToFloat_arrL( old_inp_16k_fx[0] + L_INP_MEM + out_16k_start_ind, old_inp_16k[0] + L_INP_MEM + out_16k_start_ind, 16, out_16k_end_ind - out_16k_start_ind );
+        if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] )
+        {
+            fixedToFloat_arr( sts[1]->old_inp_12k8_fx, sts[1]->old_inp_12k8, 0, L_INP_MEM );
+            fixedToFloat_arrL( old_inp_12k8_fx[1] + L_INP_MEM + out_12k8_start_ind[1], old_inp_12k8[1] + L_INP_MEM + out_12k8_start_ind[1], 16, out_12k8_end_ind[1] - out_12k8_start_ind[1] );
+        }
+        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_res_8k_fx, hCPE->hStereoDft->output_mem_res_8k, 16, STEREO_DFT_OVL_8k );
+        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_fx, hCPE->hStereoDft->output_mem_dmx, 16, STEREO_DFT_OVL_MAX );
+        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_12k8_fx, hCPE->hStereoDft->output_mem_dmx_12k8, 16, STEREO_DFT_OVL_12k8 );
+        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_fx, hCPE->hStereoDft->output_mem_dmx_16k, 16, STEREO_DFT_OVL_16k );
+        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, hCPE->hStereoDft->output_mem_dmx_16k_shb, 16, STEREO_DFT_OVL_16k );
+        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_32k_fx, hCPE->hStereoDft->output_mem_dmx_32k, 16, STEREO_DFT_OVL_32k );
+        /*fix2flt end*/
+#else
         stereo_dft_enc_synthesize( hCPE->hStereoDft, sts[0]->input, 0, input_Fs, input_Fs, 0 );
 
         /* iDFT & resampling to 12.8kHz internal sampling rate */
@@ -479,7 +562,7 @@ ivas_error ivas_cpe_enc(
             /* update old input signal buffer */
             mvr2r( old_inp_12k8[1] + L_FRAME8k, sts[1]->old_inp_12k8, L_INP_MEM );
         }
-
+#endif
         /* no iDFT at input sampling rate for Side channel -> reset the buffer */
         set_zero( sts[1]->input, input_frame );
     }
@@ -783,7 +866,664 @@ ivas_error ivas_cpe_enc(
     pop_wmops();
     return error;
 }
+#else
+ivas_error ivas_cpe_enc(
+    Encoder_Struct *st_ivas,       /* i/o: IVAS encoder structure          */
+    const int16_t cpe_id,          /* i  : CPE # identifier                */
+    float data_f_ch0[],            /* i  : input signal for channel 0      */
+    float data_f_ch1[],            /* i  : input signal for channel 1      */
+    const int16_t input_frame,     /* i  : input frame length per channel  */
+    const int16_t nb_bits_metadata /* i  : number of metadata bits         */
+)
+{
+    CPE_ENC_HANDLE hCPE;
+    Encoder_State **sts;
+    int16_t n, n_CoreChannels;
+    float old_inp_12k8[CPE_CHANNELS][L_INP_12k8];                            /* buffer of input signal @ 12k8            */
+    float old_inp_16k[CPE_CHANNELS][L_INP];                                  /* buffer of input signal @ 16kHz           */
+    float ener[CPE_CHANNELS];                                                /* residual energy from Levinson-Durbin     */
+    float relE[CPE_CHANNELS];                                                /* frame relative energy                    */
+    float A[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )];                          /* A(z) unquantized for subframes           */
+    float Aw[CPE_CHANNELS][NB_SUBFR16k * ( M + 1 )];                         /* weighted A(z) unquantized for subframes  */
+    float epsP[CPE_CHANNELS][M + 1];                                         /* LP prediction errors                     */
+    float lsp_new[CPE_CHANNELS][M];                                          /* LSPs at the end of the frame             */
+    float lsp_mid[CPE_CHANNELS][M];                                          /* ISPs in the middle of the frame          */
+    int16_t vad_hover_flag[CPE_CHANNELS];                                    /* VAD hangover flag                        */
+    int16_t attack_flag[CPE_CHANNELS];                                       /* attack flag (GSC or TC)                  */
+    float realBuffer[CPE_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; /* real buffer                              */
+    float imagBuffer[CPE_CHANNELS][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; /* imag buffer                              */
+    float old_wsp[CPE_CHANNELS][L_WSP];                                      /* old weighted input signal                */
+    float pitch_fr[CPE_CHANNELS][NB_SUBFR];                                  /* fractional pitch values                  */
+    float voicing_fr[CPE_CHANNELS][NB_SUBFR];                                /* fractional pitch gains                   */
+    int16_t loc_harm[CPE_CHANNELS];                                          /* harmonicity flag                         */
+    float cor_map_sum[CPE_CHANNELS];                                         /* speech/music clasif. parameter           */
+    int16_t vad_flag_dtx[CPE_CHANNELS];                                      /* HE-SAD flag with additional DTX HO       */
+    float enerBuffer[CPE_CHANNELS][CLDFB_NO_CHANNELS_MAX];                   /* energy buffer                            */
+    float currFlatness[CPE_CHANNELS];                                        /* flatness parameter                       */
+    float fft_buff[CPE_CHANNELS][2 * L_FFT];                                 /* FFT buffer                               */
+    int16_t tdm_ratio_idx, tdm_ratio_idx_SM;                                 /* temp. TD stereo parameters               */
+    int16_t tdm_SM_or_LRTD_Pri;                                              /* temp. TD stereo parameters               */
+    float tdm_last_ratio;                                                    /* temp. TD stereo parameters               */
+    int16_t nb_bits;                                                         /* number of DFT stereo side bits           */
+    float fr_bands[CPE_CHANNELS][2 * NB_BANDS];                              /* energy in frequency bands                 */
+    float Etot_LR[CPE_CHANNELS];                                             /* total energy                              */
+    float lf_E[CPE_CHANNELS][2 * VOIC_BINS];                                 /* per bin spectrum energy in lf             */
+    int16_t localVAD_HE_SAD[CPE_CHANNELS];                                   /* HE-SAD flag without hangover, LR channels */
+    float band_energies_LR[2 * NB_BANDS];                                    /* energy in critical bands without minimum noise floor E_MIN */
+    float orig_input[CPE_CHANNELS][L_FRAME48k];
+    float Etot_last[CPE_CHANNELS];
+    int32_t tmp, input_Fs;
+    int16_t max_bwidth, ivas_format;
+    ENCODER_CONFIG_HANDLE hEncoderConfig;
+    int32_t ivas_total_brate;
+    ivas_error error;
+    int32_t cpe_brate;
+    int32_t element_brate_ref;
+    int16_t last_bits_frame_nominal; /* last_bits_frame_nominal for M or PCh channel */
+
+    error = IVAS_ERR_OK;
+    push_wmops( "ivas_cpe_enc" );
+
+    hCPE = st_ivas->hCPE[cpe_id];
+    sts = hCPE->hCoreCoder;
+    hEncoderConfig = st_ivas->hEncoderConfig;
+    max_bwidth = hEncoderConfig->max_bwidth;
+    ivas_format = hEncoderConfig->ivas_format;
+    input_Fs = hEncoderConfig->input_Fs;
+    ivas_total_brate = hEncoderConfig->ivas_total_brate;
+    element_brate_ref = hCPE->element_brate;
+    last_bits_frame_nominal = sts[0]->bits_frame_nominal;
+
+    /*------------------------------------------------------------------*
+     * Initialization - general
+     *-----------------------------------------------------------------*/
+
+    tdm_SM_or_LRTD_Pri = 0;
+    tdm_ratio_idx = -1;
+    tdm_ratio_idx_SM = -1;
+    tdm_last_ratio = 0;
+
+
+    /*------------------------------------------------------------------*
+     * CPE initialization - core coder
+     *-----------------------------------------------------------------*/
+
+    for ( n = 0; n < CPE_CHANNELS; n++ )
+    {
+        sts[n]->idchan = n;
+        sts[n]->core = -1;
+        sts[n]->core_brate = -1; /* updated in dtx() */
+        sts[n]->max_bwidth = max_bwidth;
+        if ( st_ivas->hMCT == NULL ) /*already updated before CPE call*/
+        {
+            sts[n]->input_bwidth = sts[n]->last_input_bwidth; /* updated in BWD */
+            sts[n]->bwidth = sts[n]->last_bwidth;             /* updated in BWD */
+        }
+        sts[n]->rate_switching_reset = 0;
+    }
 
+    mvr2r( data_f_ch0, sts[0]->input, input_frame );
+    if ( data_f_ch1 != NULL ) /*this may happen for cases with odd number of channels*/
+    {
+        mvr2r( data_f_ch1, sts[1]->input, input_frame );
+    }
+
+    /*----------------------------------------------------------------*
+     * Stereo technology selection
+     * Front-VAD on input L and R channels
+     *----------------------------------------------------------------*/
+
+    if ( sts[0]->ini_frame > 0 && st_ivas->hMCT == NULL )
+    {
+        hCPE->element_mode = select_stereo_mode( hCPE, ivas_format );
+    }
+
+    stereo_mode_combined_format_enc( st_ivas, hCPE );
+
+    if ( ( error = front_vad( hCPE, NULL, hEncoderConfig, &hCPE->hFrontVad[0], st_ivas->hMCT != NULL, input_frame, vad_flag_dtx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, vad_hover_flag, band_energies_LR, NULL, NULL ) ) != IVAS_ERR_OK )
+    {
+        return error;
+    }
+
+    sts[0]->element_mode = hCPE->element_mode;
+    sts[1]->element_mode = hCPE->element_mode;
+
+    n_CoreChannels = 2;
+    if ( hCPE->element_mode == IVAS_CPE_DFT )
+    {
+        n_CoreChannels = 1; /* in DFT stereo, only M channel is coded */
+
+        sts[1]->vad_flag = 0;
+    }
+
+    /*----------------------------------------------------------------*
+     * dynamically allocate data structures depending on the actual stereo mode
+     *----------------------------------------------------------------*/
+
+    if ( ( error = stereo_memory_enc( hCPE, input_Fs, max_bwidth, &tdm_last_ratio, ivas_format, st_ivas->nchan_transport ) ) != IVAS_ERR_OK )
+    {
+        return error;
+    }
+
+
+    /*----------------------------------------------------------------*
+     * Set TD stereo parameters
+     *----------------------------------------------------------------*/
+
+    if ( ( error = stereo_set_tdm( hCPE, input_frame ) ) != IVAS_ERR_OK )
+    {
+        return error;
+    }
+
+    /*----------------------------------------------------------------*
+     * Resets/updates in case of stereo switching
+     *----------------------------------------------------------------*/
+
+    stereo_switching_enc( hCPE, sts[0]->old_input_signal, input_frame );
+
+    /*----------------------------------------------------------------*
+     * Temporal inter-channel alignment, stereo adjustment
+     *----------------------------------------------------------------*/
+
+    stereo_tca_enc( hCPE, input_frame );
+
+    /*----------------------------------------------------------------*
+     * Input signal buffering - needed in IC-BWE and TD ITD in MDCT stereo
+     *----------------------------------------------------------------*/
+
+    for ( n = 0; n < CPE_CHANNELS; n++ )
+    {
+        mvr2r( sts[n]->input, orig_input[n], input_frame );
+
+        if ( hCPE->hStereoICBWE != NULL )
+        {
+            hCPE->hStereoICBWE->dataChan[n] = &orig_input[n][0];
+        }
+    }
+
+    /*---------------------------------------------------------------*
+     * Time Domain Transient Detector
+     *---------------------------------------------------------------*/
+
+    for ( n = 0; n < CPE_CHANNELS; n++ )
+    {
+        if ( sts[n]->hTranDet == NULL )
+        {
+            currFlatness[n] = 0;
+            continue;
+        }
+
+        if ( !( ivas_format == MC_FORMAT && st_ivas->mc_mode == MC_MODE_PARAMMC ) )
+        {
+            RunTransientDetection( sts[n]->input, input_frame, sts[n]->hTranDet );
+        }
+        currFlatness[n] = GetTCXAvgTemporalFlatnessMeasure( sts[n]->hTranDet, NSUBBLOCKS, 0 );
+    }
+
+    /* Synchonize detection for downmix-based stereo */
+    if ( hCPE->element_mode == IVAS_CPE_DFT || hCPE->element_mode == IVAS_CPE_TD )
+    {
+        set_transient_stereo( hCPE, currFlatness );
+    }
+
+    /*----------------------------------------------------------------*
+     * Configuration of stereo encoder
+     *----------------------------------------------------------------*/
+
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        /* Force to MODE1 in IVAS */
+        sts[n]->codec_mode = MODE1;
+
+        sts[n]->element_mode = hCPE->element_mode;
+    }
+
+
+    if ( hCPE->element_mode != IVAS_CPE_MDCT && ( hCPE->element_brate != hCPE->last_element_brate || hCPE->last_element_mode != hCPE->element_mode || sts[0]->ini_frame == 0 ||
+                                                  ( ivas_total_brate != st_ivas->hEncoderConfig->last_ivas_total_brate ) || sts[0]->last_core_brate <= SID_2k40 ) ) /* If the last frame was SID or NO_DATA, we need to run stereo_dft_config here since VAD decision is not known yet */
+    {
+        if ( st_ivas->hQMetaData != NULL )
+        {
+            if ( ivas_format == MASA_ISM_FORMAT && st_ivas->ism_mode != ISM_MODE_NONE )
+            {
+                stereo_dft_config( hCPE->hStereoDft == NULL ? NULL : hCPE->hStereoDft->hConfig, (int32_t) ( 0.70f * st_ivas->hQMetaData->bits_frame_nominal * FRAMES_PER_SEC ), &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal );
+            }
+            else
+            {
+                stereo_dft_config( hCPE->hStereoDft == NULL ? NULL : hCPE->hStereoDft->hConfig, st_ivas->hQMetaData->bits_frame_nominal * FRAMES_PER_SEC, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal );
+            }
+        }
+        else
+        {
+            /* note; "bits_frame_nominal" needed in TD stereo as well */
+            stereo_dft_config( hCPE->hStereoDft == NULL ? NULL : hCPE->hStereoDft->hConfig, hCPE->element_brate, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal );
+        }
+    }
+
+    if ( hCPE->element_mode == IVAS_CPE_TD )
+    {
+        if ( hCPE->hStereoTD->tdm_LRTD_flag )
+        {
+            sts[0]->bits_frame_nominal = (int16_t) ( ( hCPE->element_brate >> 1 ) / FRAMES_PER_SEC );
+            sts[1]->bits_frame_nominal = (int16_t) ( ( hCPE->element_brate >> 1 ) / FRAMES_PER_SEC );
+        }
+        else
+        {
+            stereo_dft_config( NULL, hCPE->element_brate, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal );
+        }
+    }
+
+    if ( hCPE->element_mode == IVAS_CPE_MDCT )
+    {
+        /* compute bit-rate surplus per channel in combined format coding */
+        int32_t brate_surplus[CPE_CHANNELS];
+        if ( st_ivas->ism_mode == ISM_MASA_MODE_MASA_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_PARAM_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_DISC )
+        {
+            brate_surplus[0] = ( ( hCPE->brate_surplus / FRAMES_PER_SEC ) >> 1 ) * FRAMES_PER_SEC;
+            brate_surplus[1] = hCPE->brate_surplus - brate_surplus[0];
+        }
+
+        /* this is just for initialization, the true values of "total_brate" and "bits_frame_channel" are set later */
+        for ( n = 0; n < n_CoreChannels; n++ )
+        {
+            if ( st_ivas->hMCT )
+            {
+                int16_t lfe_bits;
+                lfe_bits = ( ivas_format == MC_FORMAT && st_ivas->mc_mode == MC_MODE_MCT ? st_ivas->hLFE->lfe_bits : 0 );
+                sts[n]->total_brate = hCPE->element_brate;
+                sts[n]->bits_frame_nominal = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC );
+                sts[n]->bits_frame_channel = (int16_t) ( ( ivas_total_brate / FRAMES_PER_SEC - lfe_bits - nb_bits_metadata ) / st_ivas->hMCT->nchan_out_woLFE );
+            }
+            else
+            {
+                sts[n]->bits_frame_nominal = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC );
+                sts[n]->bits_frame_channel = (int16_t) ( ( hCPE->element_brate / FRAMES_PER_SEC ) / n_CoreChannels );
+                sts[n]->total_brate = hCPE->element_brate / n_CoreChannels;
+
+                /* subtract bit-rate for combined format coding */
+                if ( st_ivas->ism_mode == ISM_MASA_MODE_MASA_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_PARAM_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_DISC )
+                {
+                    sts[n]->bits_frame_channel += (int16_t) ( brate_surplus[n] / FRAMES_PER_SEC );
+                    sts[n]->total_brate += brate_surplus[n];
+                }
+            }
+        }
+
+        /* reconfiguration in case of bitrate switching */
+        if ( hCPE->element_brate != hCPE->last_element_brate && st_ivas->hMCT == NULL )
+        {
+            initMdctStereoEncData( hCPE->hStereoMdct, ivas_format, hCPE->element_mode, hCPE->element_brate, max_bwidth, 0, NULL, 0 );
+            hCPE->hStereoMdct->isSBAStereoMode = ( ( ivas_format == SBA_FORMAT || ivas_format == SBA_ISM_FORMAT ) && ( st_ivas->nchan_transport == 2 ) );
+        }
+    }
+
+    /*----------------------------------------------------------------*
+     * Stereo processing
+     * Stereo down-mix
+     *----------------------------------------------------------------*/
+
+    if ( hCPE->element_mode == IVAS_CPE_DFT )
+    {
+        stereo_dft_hybrid_ITD_flag( hCPE->hStereoDft->hConfig, input_Fs, hCPE->hStereoDft->hItd->hybrid_itd_max );
+
+        /* Time Domain ITD compensation using extrapolation */
+        stereo_td_itd( hCPE->hStereoDft->hItd, hCPE->hStereoDft->input_mem_itd, hCPE->hStereoDft->hConfig->hybrid_itd_flag, hCPE->hStereoDft->dft_ovl, sts, input_frame, hCPE->input_mem );
+
+        /* DFT on right and left input channels */
+        stereo_dft_enc_analyze( sts, CPE_CHANNELS, input_frame, hCPE->hStereoDft, NULL, hCPE->hStereoDft->DFT, hCPE->input_mem );
+
+        sts[0]->total_brate = ( sts[0]->bits_frame_nominal + 10 ) * FRAMES_PER_SEC; /* add small overhead; st[0]->total_brate used in coder_type_modif() */
+        /* Update DFT Stereo memories */
+        stereo_dft_enc_update( hCPE->hStereoDft, sts[0]->max_bwidth );
+
+        /* DFT stereo processing */
+        stereo_dft_enc_process( hCPE, vad_flag_dtx, vad_hover_flag, input_frame );
+    }
+    else if ( hCPE->element_mode == IVAS_CPE_TD )
+    {
+        /* Determine the energy ratio between the 2 channels */
+        tdm_ratio_idx = stereo_tdm_ener_analysis(
+            ivas_format,
+            hCPE, input_frame, &tdm_SM_or_LRTD_Pri, &tdm_ratio_idx_SM );
+
+        /* Compute the downmix signal based on the ratio index */
+        stereo_tdm_downmix( hCPE->hStereoTD, sts[0]->input, sts[1]->input, input_frame, tdm_ratio_idx, ( ( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) ? tdm_SM_or_LRTD_Pri : 0 ), tdm_ratio_idx_SM );
+
+        /* signal the bitrate for BW selection in the SCh */
+        sts[0]->bits_frame_channel = 0;
+        sts[1]->bits_frame_channel = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC );
+        sts[1]->bits_frame_channel += (int16_t) ( hCPE->brate_surplus / FRAMES_PER_SEC );
+        if ( st_ivas->hQMetaData != NULL )
+        {
+            sts[1]->bits_frame_channel -= st_ivas->hQMetaData->metadata_max_bits;
+        }
+
+        Etot_last[0] = sts[0]->hNoiseEst->Etot_last;
+        Etot_last[1] = sts[1]->hNoiseEst->Etot_last;
+    }
+    else if ( hCPE->element_mode == IVAS_CPE_MDCT )
+    {
+        stereo_td_itd_mdct_stereo( hCPE, vad_flag_dtx, vad_hover_flag, input_frame );
+    }
+
+    /*----------------------------------------------------------------*
+     * DFT stereo: iDFT and resampling on both channels
+     *----------------------------------------------------------------*/
+
+    if ( hCPE->element_mode == IVAS_CPE_DFT )
+    {
+        int32_t internal_Fs;
+
+        internal_Fs = getTcxonly_ivas( IVAS_CPE_MDCT, sts[0]->bits_frame_nominal * FRAMES_PER_SEC, 0, sts[0]->is_ism_format ) == 0 ? INT_FS_16k : max( INT_FS_16k, sts[0]->sr_core );
+
+        /* iDFT at input sampling rate */
+        stereo_dft_enc_synthesize( hCPE->hStereoDft, sts[0]->input, 0, input_Fs, input_Fs, 0 );
+
+        /* iDFT & resampling to 12.8kHz internal sampling rate */
+        stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[0] + L_INP_MEM, 0, input_Fs, INT_FS_12k8, 0 );
+
+        /* iDFT & resampling to 16kHz internal sampling rate for M channel */
+        if ( input_Fs == internal_Fs )
+        {
+            mvr2r( sts[0]->input - STEREO_DFT_OVL_16k, old_inp_16k[0] + L_INP_MEM - STEREO_DFT_OVL_16k, input_frame + STEREO_DFT_OVL_16k );
+        }
+        else
+        {
+            stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_16k[0] + L_INP_MEM, 0, input_Fs, internal_Fs, 0 );
+        }
+
+        /* DFT Stereo: iDFT of residual signal at 8kHz sampling rate */
+        if ( hCPE->element_mode == IVAS_CPE_DFT && hCPE->hStereoDft->res_cod_mode[STEREO_DFT_OFFSET] )
+        {
+            mvr2r( sts[1]->old_inp_12k8, old_inp_12k8[1], L_INP_MEM );
+            stereo_dft_enc_synthesize( hCPE->hStereoDft, old_inp_12k8[1] + L_INP_MEM, 1, input_Fs, 8000, 0 );
+
+            /* update old input signal buffer */
+            mvr2r( old_inp_12k8[1] + L_FRAME8k, sts[1]->old_inp_12k8, L_INP_MEM );
+        }
+
+        /* no iDFT at input sampling rate for Side channel -> reset the buffer */
+        set_zero( sts[1]->input, input_frame );
+    }
+
+
+    /*----------------------------------------------------------------*
+     * Front Pre-processing
+     *----------------------------------------------------------------*/
+
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n],
+                                     &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], &vad_hover_flag[n], &attack_flag[n],
+                                     realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n],
+                                     fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_format, st_ivas->hMCT != NULL, ivas_total_brate );
+        if ( error != IVAS_ERR_OK )
+        {
+            return error;
+        }
+    }
+
+    /* sanity check -> DTX not supported for more than one SCEs/CPEs */
+    if ( st_ivas->nSCE + st_ivas->nCPE > 1 )
+    {
+        if ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA )
+        {
+            sts[0]->core_brate = -1;
+            sts[0]->total_brate = hCPE->element_brate;
+        }
+    }
+
+    /*----------------------------------------------------------------*
+     * Stereo DTX updates
+     *----------------------------------------------------------------*/
+
+    if ( ivas_format == MASA_FORMAT && nb_bits_metadata > 0 && hCPE->hCoreCoder[0]->Opt_DTX_ON )
+    {
+        if ( hCPE->element_mode == IVAS_CPE_DFT || hCPE->element_mode == IVAS_CPE_TD )
+        {
+            reset_metadata_spatial( ivas_format, hCPE->hMetaData, hCPE->element_brate, &tmp, sts[0]->core_brate, nb_bits_metadata );
+        }
+    }
+
+    /* MDCT stereo DTX: active/inactive frame decision; compute FD CNG coherence */
+    if ( hCPE->element_mode == IVAS_CPE_MDCT && hEncoderConfig->Opt_DTX_ON )
+    {
+        stereoFdCngCoherence( sts, hCPE->last_element_mode, fft_buff );
+
+        /* Reset metadata */
+        if ( sts[0]->cng_sba_flag || ( ivas_format == SBA_FORMAT ) )
+        {
+            reset_metadata_spatial( ivas_format, hCPE->hMetaData, hCPE->element_brate, &tmp, sts[0]->core_brate, nb_bits_metadata );
+        }
+    }
+
+    /*----------------------------------------------------------------*
+     * Core codec configuration
+     *----------------------------------------------------------------*/
+
+    /* IGF reconfiguration */
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        if ( ( hCPE->last_element_brate != hCPE->element_brate || hCPE->element_mode != hCPE->last_element_mode || ( hCPE->element_mode == IVAS_CPE_TD && sts[0]->bits_frame_nominal != last_bits_frame_nominal ) || sts[n]->last_bwidth != sts[n]->bwidth ) && ( n == 0 || hCPE->element_mode == IVAS_CPE_MDCT ) )
+        {
+            int16_t igf;
+            igf = getIgfPresent( sts[n]->element_mode, sts[n]->bits_frame_nominal * FRAMES_PER_SEC, sts[n]->max_bwidth, sts[n]->rf_mode );
+            if ( ( error = IGF_Reconfig( &sts[n]->hIGFEnc, igf, 0, sts[n]->bits_frame_nominal * FRAMES_PER_SEC, sts[n]->max_bwidth, sts[n]->element_mode, sts[n]->rf_mode ) ) != IVAS_ERR_OK )
+            {
+                return error;
+            }
+        }
+    }
+
+    if ( hCPE->element_mode == IVAS_CPE_MDCT && st_ivas->hMCT == NULL )
+    {
+        /* set coded BW for MDCT stereo */
+        set_bw_stereo( hCPE );
+
+        /* reconfiguration of MDCT stereo */
+        if ( sts[0]->bwidth != sts[0]->last_bwidth || ( ( hCPE->last_element_brate != hCPE->element_brate || hCPE->last_element_mode != hCPE->element_mode ) && sts[0]->bwidth != sts[0]->max_bwidth ) )
+        {
+            initMdctStereoEncData( hCPE->hStereoMdct, ivas_format, hCPE->element_mode, hCPE->element_brate, sts[0]->bwidth, 0, NULL, 0 );
+            hCPE->hStereoMdct->isSBAStereoMode = ( ( ivas_format == SBA_FORMAT || ivas_format == SBA_ISM_FORMAT ) && ( st_ivas->nchan_transport == 2 ) );
+
+            if ( hCPE->element_brate <= MAX_MDCT_ITD_BRATE && ivas_format == STEREO_FORMAT )
+            {
+                if ( ( error = initMdctItdHandling( hCPE->hStereoMdct, input_Fs ) ) != IVAS_ERR_OK )
+                {
+                    return error;
+                }
+            }
+        }
+    }
+
+    /* set ACELP@12k8 / ACELP@16k flag for flexible ACELP core */
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        if ( ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) && hCPE->element_mode == IVAS_CPE_DFT )
+        {
+            sts[n]->flag_ACELP16k = set_ACELP_flag_IVAS( hCPE->element_mode, hCPE->element_brate, sts[n]->core_brate, n, sts[0]->tdm_LRTD_flag, sts[n]->bwidth, sts[n]->cng_type );
+        }
+        else
+        {
+            sts[n]->flag_ACELP16k = set_ACELP_flag_IVAS( hCPE->element_mode, hCPE->element_brate, sts[n]->total_brate, n, sts[0]->tdm_LRTD_flag, sts[n]->bwidth, sts[n]->cng_type );
+        }
+    }
+
+    /* configure TD stereo encoder */
+    if ( hCPE->element_mode == IVAS_CPE_TD )
+    {
+        tdm_ol_pitch_comparison( hCPE, pitch_fr, voicing_fr );
+
+        tdm_configure_enc( ivas_format, st_ivas->ism_mode, hCPE, Etot_last, tdm_SM_or_LRTD_Pri, tdm_ratio_idx, tdm_ratio_idx_SM, attack_flag[0], nb_bits_metadata );
+
+        if ( hEncoderConfig->Opt_DTX_ON )
+        {
+            stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, -1, NULL, sts[0]->hTdCngEnc->burst_ho_cnt, NULL );
+        }
+    }
+
+    /* modify the coder_type depending on the total_brate per channel */
+    for ( n = 0; n < n_CoreChannels; n++ )
+    {
+        if ( ( hCPE->element_mode != IVAS_CPE_DFT && hCPE->element_mode != IVAS_CPE_TD ) || n == 0 ) /* modify coder_type of primary channel */
+        {
+            /* limit coder_type depending on the bitrate */
+            coder_type_modif( sts[n], relE[n] );
+        }
+    }
+
+    /*----------------------------------------------------------------*
+     * Write IVAS format signaling in SID frames
+     *----------------------------------------------------------------*/
+
+    if ( sts[0]->core_brate == SID_2k40 )
+    {
+        ivas_write_format_sid( ivas_format, hCPE->element_mode, sts[0]->hBstr );
+    }
+
+    /*----------------------------------------------------------------*
+     * DFT Stereo residual coding
+     * DFT Stereo parameters writing into the bitstream
+     *----------------------------------------------------------------*/
+
+    cpe_brate = 0;
+    if ( hCPE->element_mode == IVAS_CPE_DFT )
+    {
+        if ( hEncoderConfig->Opt_DTX_ON )
+        {
+            if ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA )
+            {
+                /* Reconfigure DFT Stereo for inactive frames */
+                if ( sts[0]->core_brate == SID_2k40 )
+                {
+                    stereo_dft_config( hCPE->hStereoDft->hConfig, IVAS_SID_5k2, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal );
+                }
+                else
+                {
+                    stereo_dft_config( hCPE->hStereoDft->hConfig, FRAME_NO_DATA, &sts[0]->bits_frame_nominal, &sts[1]->bits_frame_nominal );
+                }
+
+                stereo_dft_cng_side_gain( hCPE->hStereoDft, hCPE->hStereoCng, sts[0]->core_brate, sts[0]->last_core_brate, sts[0]->bwidth );
+            }
+            else
+            {
+                stereo_cng_upd_counters( hCPE->hStereoCng, hCPE->element_mode, hCPE->hStereoDft->nbands, hCPE->hStereoDft->sidSideGain, sts[0]->hTdCngEnc->burst_ho_cnt, &hCPE->hStereoDft->coh_fade_counter );
+            }
+        }
+
+        /* Write stereo bitstream */
+        cpe_brate = st_ivas->hCPE[0]->element_brate;
+
+        /* DFT stereo side bits */
+        if ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && cpe_brate < MASA_STEREO_MIN_BITRATE && sts[0]->core_brate != SID_2k40 && sts[0]->core_brate != FRAME_NO_DATA )
+        {
+            nb_bits = 0; /* Only mono downmix is transmitted in this case */
+        }
+        else if ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) )
+        {
+            nb_bits = hCPE->hMetaData->nb_bits_tot;
+        }
+        else
+        {
+            stereo_dft_enc_write_BS( hCPE, &nb_bits );
+        }
+
+        /* Residual coding in MDCT domain */
+        if ( !( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && ( sts[0]->core_brate == SID_2k40 || sts[0]->core_brate == FRAME_NO_DATA ) ) )
+        {
+            int16_t max_bits = (int16_t) ( hCPE->element_brate / FRAMES_PER_SEC - 0.8f * sts[0]->bits_frame_nominal );
+            if ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT )
+            {
+                max_bits -= nb_bits_metadata;
+                if ( hCPE->brate_surplus < 0 )
+                {
+                    max_bits += (int16_t) ( hCPE->brate_surplus / FRAMES_PER_SEC );
+                }
+            }
+
+            stereo_dft_enc_res( hCPE->hStereoDft, old_inp_12k8[1] + L_INP_MEM - STEREO_DFT_OVL_8k, hCPE->hMetaData, &nb_bits, max_bits );
+        }
+
+        if ( sts[0]->core_brate == FRAME_NO_DATA || sts[0]->core_brate == SID_2k40 )
+        {
+            assert( ( nb_bits <= ( ( IVAS_SID_5k2 - SID_2k40 ) / FRAMES_PER_SEC - SID_FORMAT_NBITS ) ) && "Stereo DFT CNG: bit budget is violated" );
+        }
+        else
+        {
+            /* Flexible total bitrate in M channel */
+            sts[0]->total_brate = hCPE->element_brate - ( nb_bits * FRAMES_PER_SEC );
+        }
+
+        /* subtract metadata bitbudget */
+        sts[0]->total_brate -= ( nb_bits_metadata * FRAMES_PER_SEC );
+
+        /* subtract bit-rate for combined format coding */
+        if ( ivas_format == MASA_ISM_FORMAT && ( st_ivas->ism_mode == ISM_MASA_MODE_MASA_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_PARAM_ONE_OBJ || st_ivas->ism_mode == ISM_MASA_MODE_DISC ) )
+        {
+            sts[0]->total_brate += hCPE->brate_surplus;
+        }
+    }
+
+
+    /*----------------------------------------------------------------*
+     * Core Encoder
+     *----------------------------------------------------------------*/
+
+    if ( ( error = ivas_core_enc( NULL, hCPE, st_ivas->hMCT, n_CoreChannels, old_inp_12k8, old_inp_16k, ener, A, Aw, epsP, lsp_new, lsp_mid, vad_hover_flag, attack_flag, realBuffer, imagBuffer, old_wsp, loc_harm, cor_map_sum, vad_flag_dtx, enerBuffer, fft_buff, tdm_SM_or_LRTD_Pri, ivas_format, 0 ) ) != IVAS_ERR_OK )
+    {
+        return error;
+    }
+
+    /*----------------------------------------------------------------*
+     * Common updates
+     *----------------------------------------------------------------*/
+
+    hCPE->last_element_brate = hCPE->element_brate;
+    hCPE->last_element_mode = hCPE->element_mode;
+
+    if ( ivas_format == MASA_ISM_FORMAT )
+    {
+        hCPE->element_brate = element_brate_ref;
+    }
+
+    if ( hCPE->element_mode == IVAS_CPE_MDCT && hCPE->hStereoMdct != NULL && hCPE->hStereoMdct->hItd != NULL )
+    {
+        /* update input samples buffer */
+        for ( n = 0; n < CPE_CHANNELS; n++ )
+        {
+            mvr2r( orig_input[n], sts[n]->old_input_signal, input_frame );
+        }
+    }
+    else if ( hCPE->element_mode == IVAS_CPE_DFT )
+    {
+        mvr2r( sts[0]->input, sts[0]->old_input_signal, input_frame );
+    }
+    else if ( st_ivas->hMCT == NULL ) /* note: in MCT, input buffers are updated later in ivas_mct_enc() */
+    {
+        /* update input samples buffer */
+        for ( n = 0; n < CPE_CHANNELS; n++ )
+        {
+            mvr2r( sts[n]->input, sts[n]->old_input_signal, input_frame );
+        }
+    }
+
+    if ( hCPE->hFrontVad[0] != NULL )
+    {
+        hCPE->hFrontVad[0]->ini_frame++;
+        hCPE->hFrontVad[0]->ini_frame = min( hCPE->hFrontVad[0]->ini_frame, MAX_FRAME_COUNTER );
+    }
+
+    /* Store previous attack detection flag */
+    for ( n = 0; n < CPE_CHANNELS; n++ )
+    {
+        sts[n]->hTranDet->transientDetector.prev_bIsAttackPresent = sts[n]->hTranDet->transientDetector.bIsAttackPresent;
+    }
+
+
+    pop_wmops();
+    return error;
+}
+#endif
 
 /*-------------------------------------------------------------------------
  * create_cpe_enc()
diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h
index fa8970e00..36fa5a0c7 100644
--- a/lib_enc/ivas_stat_enc.h
+++ b/lib_enc/ivas_stat_enc.h
@@ -152,7 +152,8 @@ typedef struct stereo_dft_enc_data_struct
     /*FFT*/
 #ifdef IVAS_FLOAT_FIXED
     Word32 DFT_fx[CPE_CHANNELS][STEREO_DFT_N_MAX_ENC];
-    Word16 DFT_q_fx[CLDFB_NO_CHANNELS_MAX];
+    Word16 DFT_fx_e[CPE_CHANNELS];
+    // Word16 DFT_q_fx[CLDFB_NO_CHANNELS_MAX];
 #endif
     float DFT[CPE_CHANNELS][STEREO_DFT_N_MAX_ENC];
     int16_t dft_ovl; /* Overlap size */
@@ -307,6 +308,12 @@ typedef struct stereo_dft_enc_data_struct
     /*misc*/
     float icbweRefEner;
     float lbEner;
+#ifdef IVAS_FLOAT_FIXED
+    Word32 icbweRefEner_fx;
+    Word16 icbweRefEner_fx_e;
+    Word32 lbEner_fx;
+    Word16 lbEner_fx_e;
+#endif
     int16_t flip_sign;
     Word32 dmx_res_all_prev_fx;   /* energy of the previous frame Q31*/
     Word16 switch_fade_factor_fx; /* Adaptive fade factor for switch frame Q15*/
@@ -352,7 +359,7 @@ typedef struct stereo_dft_enc_data_struct
     const Word16 *dft_trigo_16k_fx;
     const Word16 *dft_trigo_32k_fx;
 
-    Word32 output_mem_res_8k_fx[STEREO_DFT_OVL_8k];
+    Word32 output_mem_res_8k_fx[STEREO_DFT_OVL_8k]; // Q16
 
     Word32 res_cod_NRG_M_fx[STEREO_DFT_BAND_MAX];
     Word32 res_cod_NRG_S_fx[STEREO_DFT_BAND_MAX];
@@ -362,11 +369,11 @@ typedef struct stereo_dft_enc_data_struct
     Word32 past_nrgR_fx[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX];
     Word32 past_dot_prod_real_fx[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX];
     Word32 past_dot_prod_imag_fx[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX];
-    Word32 output_mem_dmx_fx[STEREO_DFT_OVL_MAX];
-    Word32 output_mem_dmx_12k8_fx[STEREO_DFT_OVL_12k8];
-    Word32 output_mem_dmx_16k_fx[STEREO_DFT_OVL_16k]; /*can hold 16, 12.8 or 32kHz signals*/
-    Word32 output_mem_dmx_32k_fx[STEREO_DFT_OVL_32k]; /*can hold 16, 12.8 or 32kHz signals*/
-    Word32 output_mem_dmx_16k_shb_fx[STEREO_DFT_OVL_16k];
+    Word32 output_mem_dmx_fx[STEREO_DFT_OVL_MAX];                                            // Q16
+    Word32 output_mem_dmx_12k8_fx[STEREO_DFT_OVL_12k8];                                      // Q16
+    Word32 output_mem_dmx_16k_fx[STEREO_DFT_OVL_16k]; /*can hold 16, 12.8 or 32kHz signals*/ // Q16
+    Word32 output_mem_dmx_32k_fx[STEREO_DFT_OVL_32k]; /*can hold 16, 12.8 or 32kHz signals*/ // Q16
+    Word32 output_mem_dmx_16k_shb_fx[STEREO_DFT_OVL_16k];                                    // Q16
     Word32 input_mem_itd_fx[CPE_CHANNELS][STEREO_DFT_OVL_MAX];
     Word32 gipd_fx[STEREO_DFT_ENC_DFT_NB];
 
diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c
index 46db4d6a5..8fb7d28c8 100644
--- a/lib_enc/ivas_stereo_dft_enc.c
+++ b/lib_enc/ivas_stereo_dft_enc.c
@@ -421,6 +421,55 @@ static void stereo_dft_enc_open(
     set_zero( hStereoDft->output_mem_dmx_16k_shb, STEREO_DFT_OVL_16k );
     set_zero( hStereoDft->output_mem_res_8k, STEREO_DFT_OVL_8k );
 
+#ifdef IVAS_FLOAT_FIXED
+    hStereoDft->dft_trigo_8k_fx = dft_trigo_32k_fx;
+    hStereoDft->dft_trigo_12k8_fx = dft_trigo_12k8_fx;
+    hStereoDft->dft_trigo_16k_fx = dft_trigo_32k_fx;
+    hStereoDft->dft_trigo_32k_fx = dft_trigo_32k_fx;
+
+    hStereoDft->win_ana_8k_fx = win_ana_8k_fx;
+    hStereoDft->win_ana_12k8_fx = win_ana_12k8_fx;
+    hStereoDft->win_ana_16k_fx = win_ana_16k_fx;
+    hStereoDft->win_ana_32k_fx = win_ana_32k_fx;
+
+    hStereoDft->win_8k_fx = win_syn_8k_fx;
+    hStereoDft->win_12k8_fx = win_syn_12k8_fx;
+    hStereoDft->win_16k_fx = win_syn_16k_fx;
+    hStereoDft->win_32k_fx = win_syn_32k_fx;
+
+    IF( EQ_32( input_Fs, 16000 ) )
+    {
+        hStereoDft->dft_trigo_fx = dft_trigo_32k_fx;
+        hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP;
+        hStereoDft->win_ana_fx = win_ana_16k_fx;
+        hStereoDft->win_fx = win_syn_16k_fx;
+    }
+    ELSE IF( EQ_32( input_Fs, 32000 ) )
+    {
+        hStereoDft->dft_trigo_fx = dft_trigo_32k_fx;
+        hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_32k_STEP;
+        hStereoDft->win_ana_fx = win_ana_32k_fx;
+        hStereoDft->win_fx = win_syn_32k_fx;
+    }
+    ELSE
+    {
+        assert( EQ_32( input_Fs, 48000 ) );
+        hStereoDft->dft_trigo_fx = dft_trigo_48k_fx;
+        hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_48k_STEP;
+        hStereoDft->win_ana_fx = win_ana_48k_fx;
+        hStereoDft->win_fx = win_syn_48k_fx;
+    }
+
+    hStereoDft->win_mdct_8k_fx = win_mdct_8k_fx;
+
+    /*I/O Buffers*/
+    set_zero_fx( hStereoDft->output_mem_dmx_fx, STEREO_DFT_OVL_MAX );
+    set_zero_fx( hStereoDft->output_mem_dmx_12k8_fx, STEREO_DFT_OVL_12k8 );
+    set_zero_fx( hStereoDft->output_mem_dmx_16k_fx, STEREO_DFT_OVL_16k );
+    set_zero_fx( hStereoDft->output_mem_dmx_16k_shb_fx, STEREO_DFT_OVL_16k );
+    set_zero_fx( hStereoDft->output_mem_res_8k_fx, STEREO_DFT_OVL_8k );
+#endif
+
     /*Bands: find the number of bands, Nyquist freq. is not taken into account*/
     NFFT_inner = STEREO_DFT_N_MAX_ENC * inner_frame_tbl[max_bwidth] / L_FRAME48k;
 #ifndef IVAS_FLOAT_FIXED
@@ -461,17 +510,15 @@ static void stereo_dft_enc_open_fx(
     Word16 win[STEREO_DFT_OVL_MAX];
 
     /*Sizes*/
-    /* input_Fs / 48000 */
-    Word16 input_Fs_48k = extract_l( Mpy_32_32( input_Fs, 44739 /* 1 / 48000 in Q31 */ ) );
 
-    // input_Fs_48k = shr(input_Fs_48k, sub(15, div_e));
-
-    hStereoDft->N = i_mult( STEREO_DFT_HOP_MAX_ENC, input_Fs_48k ); // e = div_e
+    hStereoDft->N = extract_l( Mpy_32_32( input_Fs, 42949673 /* STEREO_DFT_HOP_MAX_ENC / 48000 in Q31 */ ) ); // e = div_e
     assert( ( ( input_Fs / FRAMES_PER_SEC ) / hStereoDft->N ) == 1 );
 
     /*Init. DFT sizes*/
-    hStereoDft->NFFT = i_mult( STEREO_DFT_N_MAX_ENC, input_Fs_48k );                                   // e = div_e
-    hStereoDft->dft_ovl = i_mult( STEREO_DFT_OVL_MAX, input_Fs_48k );                                  // e = div_e
+    // hStereoDft->NFFT = i_mult( STEREO_DFT_N_MAX_ENC, input_Fs_48k );                                   // e = div_e
+    hStereoDft->NFFT = extract_l( Mpy_32_32( input_Fs, 85899346 ) ); // e = div_e
+    // hStereoDft->dft_ovl = i_mult( STEREO_DFT_OVL_MAX, input_Fs );    // e = div_e
+    hStereoDft->dft_ovl = extract_l( Mpy_32_32( input_Fs, 18790482 ) );                                // e = div_e
     mdct_window_sine_IVAS_updated( win_p, input_Fs, hStereoDft->dft_ovl, FULL_OVERLAP, IVAS_CPE_DFT ); // win_e = 15
     FOR( Word16 i = 0; i < shr( STEREO_DFT_OVL_MAX, 1 ); i++ )
     {
@@ -1115,6 +1162,423 @@ void stereo_dft_enc_analyze(
  * Inverse DFT on a 20ms frame
  *-------------------------------------------------------------------------*/
 
+#ifdef IVAS_FLOAT_FIXED
+Word32 stereo_dft_enc_synthesize_fx(
+    STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle       */
+    // float *output,                         /* o  : output synthesis            */
+    Word32 *output_fx, /* o  : output synthesis           Q16 */
+    Word16 *output_start_index,
+    Word16 *output_end_index,
+    const Word16 chan,                 /* i  : channel number              */
+    const Word32 input_Fs,             /* i  : input sampling rate         */
+    const Word32 output_sampling_rate, /* i  : output sampling rate        */
+    const Word16 L_frame,              /* i  : frame length at internal Fs */
+    Word16 *nrg_out_fx_e )
+{
+    Word16 i, j, sign;
+    // float *pDFT_in;
+    Word32 *pDFT_in_fx;
+    Word16 DFT_in_fx_e;
+    Word16 offset, NFFT, N, ovl, zp;
+    Word16 temp_exp;
+    // float fac;
+    Word32 fac_fx;
+    // float *mem;
+    Word32 *mem_fx;
+    // const float *trigo, *win, *win_ana;
+    const Word16 *trigo_fx;
+    const Word32 *win_fx, *win_ana_fx;
+    // float tmp[STEREO_DFT_N_MAX_ENC];
+    Word32 tmp_fx[STEREO_DFT_N_MAX_ENC];
+    // float nrg;
+    Word32 nrg_fx;
+    Word16 nrg_fx_e;
+    // float trigo_enc[STEREO_DFT_N_MAX_ENC / 2 + 1];
+    Word16 trigo_enc_fx[STEREO_DFT_N_MAX_ENC / 2 + 1];
+    Word16 trigo_step;
+    Word16 scal_fac;
+
+
+    /*-----------------------------------------------------------------*
+     * Initialization
+     *-----------------------------------------------------------------*/
+
+    IF( L_frame > 0 )
+    {
+        assert( ( output_sampling_rate == 16000 ) && "High-band generation only possible at 16kHz!" );
+    }
+    nrg_fx = 0;
+    move32();
+    nrg_fx_e = 0;
+    move16();
+
+    hStereoDft->icbweRefEner_fx = 0;
+    move32();
+    hStereoDft->icbweRefEner_fx_e = 0;
+    move16();
+    hStereoDft->lbEner_fx = 0;
+    move32();
+    hStereoDft->lbEner_fx_e = 0;
+    move16();
+
+    IF( chan == 0 )
+    {
+        pDFT_in_fx = hStereoDft->DFT_fx[0];
+        DFT_in_fx_e = hStereoDft->DFT_fx_e[0];
+        move16();
+    }
+    ELSE
+    {
+        pDFT_in_fx = hStereoDft->DFT_fx[1];
+        DFT_in_fx_e = hStereoDft->DFT_fx_e[1];
+        move16();
+    }
+
+    IF( EQ_32( output_sampling_rate, input_Fs ) )
+    {
+        assert( chan == 0 );
+
+        NFFT = hStereoDft->NFFT;
+        move16();
+        fac_fx = MAX_32;
+        move32();
+        N = hStereoDft->N;
+        move16();
+        ovl = hStereoDft->dft_ovl;
+        move16();
+        zp = hStereoDft->dft_zp;
+        move16();
+        trigo_fx = hStereoDft->dft_trigo_fx;
+        trigo_step = hStereoDft->dft_trigo_step;
+        move16();
+        IF( L_frame > 0 )
+        {
+            mem_fx = hStereoDft->output_mem_dmx_16k_shb_fx;
+        }
+        ELSE
+        {
+            mem_fx = hStereoDft->output_mem_dmx_fx;
+        }
+        win_fx = hStereoDft->win_fx;
+        win_ana_fx = hStereoDft->win_ana_fx;
+
+        push_wmops( "DFT_synth_fs" );
+    }
+    ELSE IF( EQ_32( output_sampling_rate, INT_FS_12k8 ) )
+    {
+        assert( chan == 0 );
+
+        NFFT = STEREO_DFT_N_12k8_ENC;
+        move16();
+        N = STEREO_DFT_HOP_12k8_ENC;
+        move16();
+        zp = STEREO_DFT_ZP_12k8_ENC;
+        move16();
+        // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT );
+        fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp );
+        fac_fx = L_shl( fac_fx, temp_exp );
+        ovl = STEREO_DFT_OVL_12k8;
+        move16();
+        trigo_fx = hStereoDft->dft_trigo_12k8_fx;
+        trigo_step = STEREO_DFT_TRIGO_SRATE_12k8_STEP;
+        move16();
+        mem_fx = hStereoDft->output_mem_dmx_12k8_fx;
+        win_fx = hStereoDft->win_12k8_fx;
+        win_ana_fx = hStereoDft->win_ana_12k8_fx;
+
+        push_wmops( "DFT_synth_12k8" );
+    }
+    ELSE IF( EQ_32( output_sampling_rate, 16000 ) )
+    {
+        assert( chan == 0 );
+
+        NFFT = STEREO_DFT_N_16k_ENC;
+        move16();
+        N = STEREO_DFT_HOP_16k_ENC;
+        move16();
+        zp = STEREO_DFT_ZP_16k_ENC;
+        move16();
+        // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT );
+        fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp );
+        fac_fx = L_shl( fac_fx, temp_exp );
+        ovl = STEREO_DFT_OVL_16k;
+        move16();
+        trigo_fx = hStereoDft->dft_trigo_16k_fx;
+        trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP;
+        move16();
+        IF( L_frame > 0 )
+        {
+            mem_fx = hStereoDft->output_mem_dmx_16k_shb_fx;
+
+            push_wmops( "DFT_synth_16k_shb" );
+        }
+        ELSE
+        {
+            mem_fx = hStereoDft->output_mem_dmx_16k_fx;
+
+            push_wmops( "DFT_synth_16k" );
+        }
+        win_fx = hStereoDft->win_16k_fx;
+        win_ana_fx = hStereoDft->win_ana_16k_fx;
+    }
+    ELSE IF( EQ_32( output_sampling_rate, 32000 ) )
+    {
+        assert( chan == 0 );
+
+        NFFT = STEREO_DFT_N_32k_ENC;
+        move16();
+        N = STEREO_DFT_HOP_32k_ENC;
+        move16();
+        zp = STEREO_DFT_ZP_32k_ENC;
+        move16();
+        // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT );
+        fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp );
+        fac_fx = L_shl( fac_fx, temp_exp );
+        ovl = STEREO_DFT_OVL_32k;
+        move16();
+        trigo_fx = hStereoDft->dft_trigo_32k_fx;
+        trigo_step = STEREO_DFT_TRIGO_SRATE_32k_STEP;
+        move16();
+        mem_fx = hStereoDft->output_mem_dmx_32k_fx;
+        win_fx = hStereoDft->win_32k_fx;
+        win_ana_fx = hStereoDft->win_ana_32k_fx;
+
+        push_wmops( "DFT_synth_32k" );
+    }
+    ELSE IF( EQ_32( output_sampling_rate, 8000 ) )
+    {
+        assert( chan == 1 );
+
+        NFFT = STEREO_DFT_N_8k_ENC;
+        move16();
+        N = STEREO_DFT_HOP_8k_ENC;
+        move16();
+        zp = STEREO_DFT_ZP_8k_ENC;
+        move16();
+        // fac = (float) ( NFFT ) / (float) ( hStereoDft->NFFT );
+        fac_fx = BASOP_Util_Divide3232_Scale_cadence( NFFT, hStereoDft->NFFT, &temp_exp );
+        fac_fx = L_shl( fac_fx, temp_exp );
+        ovl = STEREO_DFT_OVL_8k;
+        move16();
+        trigo_fx = hStereoDft->dft_trigo_8k_fx;
+        trigo_step = STEREO_DFT_TRIGO_SRATE_8k_STEP;
+        move16();
+        mem_fx = hStereoDft->output_mem_res_8k_fx;
+        win_fx = hStereoDft->win_8k_fx;
+        win_ana_fx = hStereoDft->win_ana_8k_fx;
+
+        push_wmops( "DFT_synth_8k" );
+    }
+    ELSE
+    {
+        assert( 0 && "DFT stereo: sampling rate not supported!" );
+        NFFT = -1; /* to avoid compilation warning */
+        move16();
+        fac_fx = -1; /* to avoid compilation warning */
+        move32();
+        N = -1; /* to avoid compilation warning */
+        move16();
+        zp = -1; /* to avoid compilation warning */
+        move16();
+        win_fx = NULL;   /* to avoid compilation warning */
+        trigo_fx = NULL; /* to avoid compilation warning */
+        trigo_step = -1; /* to avoid compilation warning */
+        move16();
+        ovl = -1; /* to avoid compilation warning */
+        move16();
+        mem_fx = NULL;     /* to avoid compilation warning */
+        win_ana_fx = NULL; /* to avoid compilation warning */
+    }
+
+    offset = 0;
+    move16();
+
+    FOR( i = 0; i < shr( NFFT, 2 ); i++ )
+    {
+        trigo_enc_fx[i] = trigo_fx[imult1616( i, trigo_step )];
+        move16();
+        trigo_enc_fx[sub( shr( NFFT, 1 ), i )] = trigo_fx[imult1616( i, trigo_step )];
+        move16();
+    }
+    trigo_enc_fx[shr( NFFT, 2 )] = trigo_fx[imult1616( shr( NFFT, 2 ), trigo_step )];
+    move16();
+
+    /*-----------------------------------------------------------------*
+     * Synthesizing & resampling
+     *-----------------------------------------------------------------*/
+
+    offset = negate( ovl );
+
+    test();
+    IF( EQ_16( L_frame, L_FRAME ) || EQ_16( L_frame, L_FRAME16k ) )
+    {
+        // for ( i = (int16_t) ( 200 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i < (int16_t) ( 400 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i++ )
+        FOR( i = 320; i < 640; i++ )
+        {
+            // hStereoDft->icbweRefEner += pDFT_in[2 * i] * pDFT_in[2 * i] + pDFT_in[2 * i + 1] * pDFT_in[2 * i + 1];
+            hStereoDft->icbweRefEner_fx = BASOP_Util_Add_Mant32Exp( hStereoDft->icbweRefEner_fx, hStereoDft->icbweRefEner_fx_e, L_add( L_shr( Mpy_32_32( pDFT_in_fx[2 * i], pDFT_in_fx[2 * i] ), 1 ), L_shr( Mpy_32_32( pDFT_in_fx[2 * i + 1], pDFT_in_fx[2 * i + 1] ), 1 ) ), add( shl( DFT_in_fx_e, 1 ), 1 ), &hStereoDft->icbweRefEner_fx_e );
+            move32();
+        }
+        // for ( i = 0; i < (int16_t) ( 200 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i++ )
+        FOR( i = 0; i < 320; i++ )
+        {
+            // hStereoDft->lbEner += pDFT_in[2 * i] * pDFT_in[2 * i] + pDFT_in[2 * i + 1] * pDFT_in[2 * i + 1];
+            hStereoDft->lbEner_fx = BASOP_Util_Add_Mant32Exp( hStereoDft->lbEner_fx, hStereoDft->lbEner_fx_e, L_add( L_shr( Mpy_32_32( pDFT_in_fx[2 * i], pDFT_in_fx[2 * i] ), 1 ), L_shr( Mpy_32_32( pDFT_in_fx[2 * i + 1], pDFT_in_fx[2 * i + 1] ), 1 ) ), add( shl( DFT_in_fx_e, 1 ), 1 ), &hStereoDft->lbEner_fx_e );
+            move32();
+        }
+        hStereoDft->icbweRefEner_fx = Mpy_32_32( hStereoDft->icbweRefEner_fx, fac_fx );
+        move32();
+        hStereoDft->lbEner_fx = Mpy_32_32( hStereoDft->lbEner_fx, fac_fx );
+        move32();
+    }
+
+    /*Flip?*/
+    set32_fx( tmp_fx, 0, STEREO_DFT_N_MAX_ENC );
+    IF( EQ_16( L_frame, L_FRAME ) )
+    {
+        /* 6 to 14 kHz SHB target signal*/
+        j = 2;
+        move16();
+        sign = hStereoDft->flip_sign;
+        move16();
+
+        // for ( i = (int16_t) ( 350 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ); i >= (int16_t) 150 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i-- )
+        FOR( i = 560; i >= 240; i-- )
+        {
+            /* alternate sign between frames for even starting index */
+            tmp_fx[j++] = W_extract_l( W_mult0_32_32( sign, Mpy_32_32( pDFT_in_fx[2 * i], fac_fx ) ) );
+            tmp_fx[j++] = W_extract_l( W_mult0_32_32( negate( sign ), Mpy_32_32( pDFT_in_fx[2 * i + 1], fac_fx ) ) );
+            move32();
+            move32();
+        }
+        scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 );
+        scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16
+        DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac );
+
+        hStereoDft->flip_sign = negate( sign );
+        move16();
+
+        // for ( i = 0; i <= (int16_t) 100 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i++ )
+        FOR( i = 0; i <= 160; i++ )
+        {
+            nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i], tmp_fx[2 * i] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e );
+            nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i + 1], tmp_fx[2 * i + 1] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e );
+        }
+    }
+    ELSE IF( EQ_16( L_frame, L_FRAME16k ) )
+    {
+        /* 7.5 - 15.5 kHz SHB target signal*/
+        j = 2;
+        move16();
+        // for ( i = (int16_t) ( 400 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) ) - 1; i >= (int16_t) 200 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i-- )
+        FOR( i = 640 - 1; i >= 320; i-- )
+        {
+            tmp_fx[j++] = Mpy_32_32( pDFT_in_fx[2 * i], fac_fx );
+            tmp_fx[j++] = L_negate( Mpy_32_32( pDFT_in_fx[2 * i + 1], fac_fx ) );
+            move32();
+            move32();
+        }
+        scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 );
+        scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16
+        DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac );
+
+        // for ( i = 0; i <= (int16_t) 100 * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ); i++ )
+        FOR( i = 0; i <= 160; i++ )
+        {
+            nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i], tmp_fx[2 * i] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e );
+            nrg_fx = BASOP_Util_Add_Mant32Exp( nrg_fx, nrg_fx_e, Mpy_32_32( tmp_fx[2 * i + 1], tmp_fx[2 * i + 1] ), shl( DFT_in_fx_e, 1 ), &nrg_fx_e );
+        }
+    }
+    ELSE IF( NE_32( fac_fx, MAX_32 ) )
+    {
+        /*Copy and scale*/
+        tmp_fx[0] = Mpy_32_32( pDFT_in_fx[0], fac_fx );
+        move32();
+        tmp_fx[1] = 0;
+        move32();
+        FOR( i = 2; i < NFFT; i++ )
+        {
+            tmp_fx[i] = Mpy_32_32( pDFT_in_fx[i], fac_fx );
+            move32();
+        }
+
+        IF( LT_32( fac_fx, MAX_32 ) )
+        {
+            tmp_fx[1] = 0; /*Nyquist is set to 0*/
+            tmp_fx[0] = 0; /*DC is set to 0*/
+            move32();
+            move32();
+        }
+        scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 );
+        scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16
+        DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac );
+    }
+    ELSE
+    {
+        FOR( i = 0; i < NFFT; i++ )
+        {
+            tmp_fx[i] = pDFT_in_fx[i];
+            move32();
+        }
+        scal_fac = sub( L_norm_arr( tmp_fx, STEREO_DFT_N_MAX_ENC ), 1 );
+        scale_sig32( tmp_fx, STEREO_DFT_N_MAX_ENC, scal_fac ); // Q16
+        DFT_in_fx_e = sub( DFT_in_fx_e, scal_fac );
+    }
+
+    /*Reconstruct */
+    /*IFFT*/
+    scal_fac = sub( 1, find_guarded_bits_fx( NFFT ) );
+    scale_sig32( tmp_fx, NFFT, scal_fac ); // Q16
+    rfft_fx( tmp_fx, trigo_enc_fx, NFFT, +1 );
+    Scale_sig32( tmp_fx, NFFT, sub( sub( DFT_in_fx_e, scal_fac ), 15 ) ); // Q16
+
+    FOR( i = 0; i < ovl; i++ )
+    {
+        // output[offset + i] = mem[i] + tmp[zp + i] * win[i];
+        output_fx[offset + i] = L_add_sat( mem_fx[i], Mpy_32_32( tmp_fx[zp + i], win_fx[i] ) );
+        move32();
+        // mem[i] = tmp[zp + N + i] * win[ovl - 1 - i];
+        mem_fx[i] = Mpy_32_32( tmp_fx[zp + N + i], win_fx[ovl - 1 - i] );
+        move32();
+    }
+
+    /*Middle->Copy*/
+    FOR( i = 0; i < N - ovl; i++ )
+    {
+        // output_fx[offset + ovl + i] = tmp[zp + ovl + i];
+        output_fx[offset + ovl + i] = tmp_fx[zp + ovl + i];
+        move32();
+    }
+
+    /*-----------------------------------------------------------------*
+     * Lookahead: redress signal
+     *-----------------------------------------------------------------*/
+
+    FOR( i = 0; i < ovl; i++ )
+    {
+        Word32 ifft_deviation = tmp_fx[zp + N + ovl];
+        move32();
+        // output[offset + N + i] = ( tmp[zp + N + i] - ifft_deviation ) / win_ana[ovl - 1 - i] + ifft_deviation;
+        Word16 L_temp_e;
+        Word32 L_temp = BASOP_Util_Divide3232_Scale_cadence( L_sub_sat( tmp_fx[zp + N + i], ifft_deviation ), win_ana_fx[ovl - 1 - i], &L_temp_e );
+        L_temp = L_shl_sat( L_temp, L_temp_e );
+        output_fx[offset + N + i] = L_add_sat( L_temp, ifft_deviation ); // Q16
+        move32();
+    }
+
+    *output_start_index = offset;
+    move16();
+    *output_end_index = add( add( offset, ovl ), N );
+    move16();
+    IF( nrg_out_fx_e )
+    {
+        *nrg_out_fx_e = nrg_fx_e;
+        move16();
+    }
+    pop_wmops();
+    return ( nrg_fx );
+}
+#endif
 float stereo_dft_enc_synthesize(
     STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle       */
     float *output,                         /* o  : output synthesis            */
diff --git a/lib_enc/swb_pre_proc.c b/lib_enc/swb_pre_proc.c
index 91dc8beea..3336ee336 100644
--- a/lib_enc/swb_pre_proc.c
+++ b/lib_enc/swb_pre_proc.c
@@ -44,6 +44,10 @@
 #include "wmc_auto.h"
 #include "ivas_prot.h"
 #include "ivas_rom_enc.h"
+#ifdef IVAS_FLOAT_FIXED
+#include "ivas_prot_fx.h"
+#include "prot_fx.h"
+#endif
 
 /*-------------------------------------------------------------------*
  * Local constants
@@ -270,6 +274,550 @@ void wb_pre_proc(
  * - Common SWB TBE and SWB BWE pre-processing
  *-------------------------------------------------------------------*/
 
+#ifdef IVAS_FLOAT_FIXED
+/*full implementation pending*/
+void swb_pre_proc_ivas_fx(
+    Encoder_State *st,                                         /* i/o: encoder state structure                 */
+    float *new_swb_speech,                                     /* o  : original input signal at 32kHz          */
+    Word32 *new_swb_speech_fx,                                 /* o  : original input signal at 32kHz          */
+    float *shb_speech,                                         /* o  : SHB target signal (6-14kHz) at 16kHz    */
+    float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : real buffer                             */
+    float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : imag buffer                             */
+    CPE_ENC_HANDLE hCPE                                        /* i/o: CPE encoder structure                   */
+)
+{
+    int16_t Sample_Delay_SWB_BWE, inner_frame, delay;
+    TD_BWE_ENC_HANDLE hBWE_TD;
+    FD_BWE_ENC_HANDLE hBWE_FD;
+    int32_t inner_Fs, input_Fs;
+    float old_input[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k];
+    Word32 old_input_fx[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k]; // fixed counterpart
+    float spchTmp[L_FRAME32k], spchTmp2[L_FRAME32k];
+    int16_t i, j, L_resamp;
+    int16_t startB, endB;
+    float *realBufferFlipped[CLDFB_NO_COL_MAX];
+    float *imagBufferFlipped[CLDFB_NO_COL_MAX];
+    float realBufferTmp[CLDFB_NO_COL_MAX][20];
+    float imagBufferTmp[CLDFB_NO_COL_MAX][20];
+    int16_t ts, nB, uB;
+    float sign, lbEner, v, t, regression;
+    const float *thr, *regV;
+    int16_t Sample_Delay_SWB_BWE32k, lMemRecalc32k, dft_ovl32k;
+
+    lMemRecalc32k = NS2SA( 32000, L_MEM_RECALC_NS );
+
+    /* initialization */
+    hBWE_TD = st->hBWE_TD;
+    hBWE_FD = st->hBWE_FD;
+    input_Fs = st->input_Fs;
+
+    for ( j = 0; j < CLDFB_NO_COL_MAX; j++ )
+    {
+        set_f( realBufferTmp[j], 0, 20 );
+        set_f( imagBufferTmp[j], 0, 20 );
+        realBufferFlipped[j] = realBufferTmp[j];
+        imagBufferFlipped[j] = imagBufferTmp[j];
+    }
+
+    set_f( old_input, 0.0f, NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k );
+
+    if ( input_Fs == 32000 )
+    {
+        if ( st->element_mode > EVS_MONO )
+        {
+            Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
+            if ( st->L_frame == L_FRAME16k )
+            {
+                Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
+            }
+
+            mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
+            mvr2r( st->input - L_FRAME32k, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
+
+            if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
+            {
+                mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
+            }
+            else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
+            {
+                mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
+            }
+        }
+
+        mvr2r( st->input, new_swb_speech, L_FRAME32k );
+
+        if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE && st->extl != SWB_BWE_HIGHRATE )
+        {
+            Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
+            if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
+            {
+                Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
+            }
+            if ( st->element_mode > EVS_MONO )
+            {
+                Sample_Delay_SWB_BWE -= NS2SA( 32000, DELAY_FIR_RESAMPL_NS );
+            }
+
+            mvr2r( hBWE_FD->old_fdbwe_speech, &old_input[Sample_Delay_SWB_BWE], L_FRAME32k );
+
+            set_f( old_input, 0, Sample_Delay_SWB_BWE );
+            mvr2r( hBWE_FD->old_fdbwe_speech + L_FRAME32k - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
+            if ( st->extl != WB_BWE )
+            {
+                mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME32k );
+            }
+        }
+
+        if ( st->extl != SWB_BWE && st->extl != FB_BWE )
+        {
+            mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
+        }
+    }
+    else /* 48 kHz */
+    {
+
+        Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
+        Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS );
+        if ( st->L_frame == L_FRAME16k )
+        {
+            Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
+            Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_16k_NS );
+        }
+
+        dft_ovl32k = 0;
+        if ( st->element_mode == IVAS_CPE_DFT )
+        {
+            dft_ovl32k = (int16_t) ( STEREO_DFT_OVL_MAX * 32000 / 48000 );
+        }
+
+        if ( st->codec_mode == MODE1 )
+        {
+            if ( st->element_mode > EVS_MONO )
+            {
+
+                if ( st->element_mode == IVAS_CPE_TD )
+                {
+                }
+                else if ( st->bwidth == FB )
+                {
+                    mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
+                }
+
+                mvr2r( st->input - L_FRAME48k, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
+
+                if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
+                {
+                    if ( st->bwidth == SWB )
+                    {
+                        /* buffers hBWE_FD->old_input[] and hBWE_FD->old_wtda_swb[] need to be at 32 kHz (inner) sampling rate */
+
+                        decimate_2_over_3_allpass( st->input - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, spchTmp, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
+
+                        mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( lMemRecalc32k - Sample_Delay_SWB_BWE32k ), lMemRecalc32k - Sample_Delay_SWB_BWE32k );
+                        mvr2r( spchTmp + lMemRecalc32k - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
+                    }
+                    else /* FB_BWE */
+                    {
+                        mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
+                        mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
+                    }
+                }
+                else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
+                {
+                    if ( st->bwidth == SWB )
+                    {
+                        lerp_flt( st->input - hCPE->hStereoDft->dft_ovl, spchTmp, dft_ovl32k - Sample_Delay_SWB_BWE32k, hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
+
+                        mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( dft_ovl32k - Sample_Delay_SWB_BWE32k ), dft_ovl32k - Sample_Delay_SWB_BWE32k );
+                    }
+                    else
+                    {
+                        mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
+                    }
+                }
+            }
+
+            if ( ( st->extl != SWB_BWE && st->extl != FB_BWE && st->core == ACELP_CORE ) || ( st->element_mode == IVAS_CPE_DFT && st->core != ACELP_CORE ) /*resampling not needed for MDCT cores*/ )
+            {
+                /* move the resampling out of the TDBWE path as new_swb_speech is not needed for TDBWE. */
+                mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
+            }
+            else
+            {
+                if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE )
+                {
+                    /* resample 48 kHz to 32kHz */
+                    if ( st->last_bwidth == FB )
+                    {
+                        inner_frame = L_FRAME48k;
+                        inner_Fs = 48000;
+                        mvr2r( hBWE_FD->old_fdbwe_speech, new_swb_speech, L_FRAME48k );
+                    }
+                    else
+                    {
+                        inner_frame = L_FRAME32k;
+                        inner_Fs = 32000;
+
+                        if ( st->element_mode != IVAS_CPE_DFT )
+                        {
+                            decimate_2_over_3_allpass( hBWE_FD->old_fdbwe_speech, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
+                        }
+                        else
+                        {
+                            lerp_flt( hBWE_FD->old_fdbwe_speech, new_swb_speech, inner_frame, L_FRAME48k );
+                        }
+
+                        if ( st->element_mode == IVAS_CPE_DFT && st->idchan == 0 )
+                        {
+                            for ( i = 0; i < STEREO_DFT_OVL_32k; i++ )
+                            {
+                                hCPE->hStereoDft->output_mem_dmx_32k[i] = new_swb_speech[inner_frame - STEREO_DFT_OVL_32k + i] * hCPE->hStereoDft->win_32k[STEREO_DFT_OVL_32k - 1 - i];
+                            }
+                        }
+                    }
+
+                    Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
+                    if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
+                    {
+                        Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
+                    }
+                    if ( st->element_mode > EVS_MONO )
+                    {
+                        Sample_Delay_SWB_BWE -= NS2SA( inner_Fs, DELAY_FIR_RESAMPL_NS );
+                    }
+
+                    mvr2r( new_swb_speech, &old_input[Sample_Delay_SWB_BWE], inner_frame );
+                    set_f( old_input, 0, Sample_Delay_SWB_BWE );
+                    mvr2r( new_swb_speech + inner_frame - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
+                    mvr2r( old_input, hBWE_FD->old_wtda_swb, inner_frame );
+                }
+
+                /* resample 48 kHz to 32kHz */
+                if ( st->bwidth == FB )
+                {
+                    mvr2r( st->input, new_swb_speech, L_FRAME48k );
+                }
+                else
+                {
+                    if ( st->element_mode == IVAS_CPE_TD )
+                    {
+                        float dec_2_over_3_mem_tmp[L_FILT_2OVER3], dec_2_over_3_mem_lp_tmp[L_FILT_2OVER3_LP];
+
+                        decimate_2_over_3_allpass( st->input, L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
+
+                        mvr2r( hBWE_TD->dec_2_over_3_mem, dec_2_over_3_mem_tmp, L_FILT_2OVER3 );
+                        mvr2r( hBWE_TD->dec_2_over_3_mem_lp, dec_2_over_3_mem_lp_tmp, L_FILT_2OVER3_LP );
+
+                        decimate_2_over_3_allpass( st->input + L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, new_swb_speech + L_FRAME32k - lMemRecalc32k, dec_2_over_3_mem_tmp, dec_2_over_3_mem_lp_tmp );
+                    }
+                    else if ( st->element_mode != IVAS_CPE_DFT )
+                    {
+                        decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
+                    }
+                    else /* IVAS_CPE_DFT */
+                    {
+                        /*flt2fix*/
+                        f2me_buf( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], &hCPE->hStereoDft->DFT_fx_e[0], STEREO_DFT_N_MAX_ENC );
+                        f2me_buf( hCPE->hStereoDft->DFT[1], hCPE->hStereoDft->DFT_fx[1], &hCPE->hStereoDft->DFT_fx_e[1], STEREO_DFT_N_MAX_ENC );
+                        f2me( hCPE->hStereoDft->icbweRefEner, &hCPE->hStereoDft->icbweRefEner_fx, &hCPE->hStereoDft->icbweRefEner_fx_e );
+                        f2me( hCPE->hStereoDft->lbEner, &hCPE->hStereoDft->lbEner_fx, &hCPE->hStereoDft->lbEner_fx_e );
+                        floatToFixed_arrL( hCPE->hStereoDft->output_mem_res_8k, hCPE->hStereoDft->output_mem_res_8k_fx, 16, STEREO_DFT_OVL_8k );
+                        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx, hCPE->hStereoDft->output_mem_dmx_fx, 16, STEREO_DFT_OVL_MAX );
+                        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_12k8, hCPE->hStereoDft->output_mem_dmx_12k8_fx, 16, STEREO_DFT_OVL_12k8 );
+                        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k, hCPE->hStereoDft->output_mem_dmx_16k_fx, 16, STEREO_DFT_OVL_16k );
+                        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb, hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, 16, STEREO_DFT_OVL_16k );
+                        floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_32k, hCPE->hStereoDft->output_mem_dmx_32k_fx, 16, STEREO_DFT_OVL_32k );
+                        /*flt2fix end*/
+
+                        Word16 out_start_ind, out_end_ind;
+                        stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, new_swb_speech_fx, &out_start_ind, &out_end_ind, st->idchan, input_Fs, 32000, 0, NULL );
+
+                        /*fix2flt*/
+                        hCPE->hStereoDft->icbweRefEner = me2f( hCPE->hStereoDft->icbweRefEner_fx, hCPE->hStereoDft->icbweRefEner_fx_e );
+                        hCPE->hStereoDft->lbEner = me2f( hCPE->hStereoDft->lbEner_fx, hCPE->hStereoDft->lbEner_fx_e );
+                        fixedToFloat_arrL( new_swb_speech_fx + out_start_ind, new_swb_speech + out_start_ind, 16, out_end_ind - out_start_ind );
+                        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_res_8k_fx, hCPE->hStereoDft->output_mem_res_8k, 16, STEREO_DFT_OVL_8k );
+                        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_fx, hCPE->hStereoDft->output_mem_dmx, 16, STEREO_DFT_OVL_MAX );
+                        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_12k8_fx, hCPE->hStereoDft->output_mem_dmx_12k8, 16, STEREO_DFT_OVL_12k8 );
+                        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_fx, hCPE->hStereoDft->output_mem_dmx_16k, 16, STEREO_DFT_OVL_16k );
+                        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, hCPE->hStereoDft->output_mem_dmx_16k_shb, 16, STEREO_DFT_OVL_16k );
+                        fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_32k_fx, hCPE->hStereoDft->output_mem_dmx_32k, 16, STEREO_DFT_OVL_32k );
+                        /*fix2flt end*/
+
+                        mvr2r( new_swb_speech - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
+                    }
+                }
+            }
+        }
+        else
+        {
+            /* resample 48 kHz to 32kHz */
+            if ( st->bwidth == FB )
+            {
+                mvr2r( st->input, new_swb_speech, L_FRAME48k );
+            }
+            else
+            {
+                decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
+            }
+        }
+    }
+
+    if ( ( st->core == ACELP_CORE && st->extl != SWB_BWE_HIGHRATE && st->extl != FB_BWE_HIGHRATE ) ||
+         ( ( st->total_brate == ACELP_9k60 || st->rf_mode ) && st->bwidth == SWB && st->element_mode == EVS_MONO ) )
+    {
+        float CldfbHB = 0;
+        Word32 CldfbHB_fx = 0;   // fixed counterpart
+        Word16 CldfbHB_fx_e = 0; // fixed counterpart
+
+        if ( st->element_mode == IVAS_CPE_DFT )
+        {
+
+            /*flt2fix*/
+            f2me_buf( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], &hCPE->hStereoDft->DFT_fx_e[0], STEREO_DFT_N_MAX_ENC );
+            f2me_buf( hCPE->hStereoDft->DFT[1], hCPE->hStereoDft->DFT_fx[1], &hCPE->hStereoDft->DFT_fx_e[1], STEREO_DFT_N_MAX_ENC );
+            f2me( hCPE->hStereoDft->icbweRefEner, &hCPE->hStereoDft->icbweRefEner_fx, &hCPE->hStereoDft->icbweRefEner_fx_e );
+            f2me( hCPE->hStereoDft->lbEner, &hCPE->hStereoDft->lbEner_fx, &hCPE->hStereoDft->lbEner_fx_e );
+            floatToFixed_arrL( hCPE->hStereoDft->output_mem_res_8k, hCPE->hStereoDft->output_mem_res_8k_fx, 16, STEREO_DFT_OVL_8k );
+            floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx, hCPE->hStereoDft->output_mem_dmx_fx, 16, STEREO_DFT_OVL_MAX );
+            floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_12k8, hCPE->hStereoDft->output_mem_dmx_12k8_fx, 16, STEREO_DFT_OVL_12k8 );
+            floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k, hCPE->hStereoDft->output_mem_dmx_16k_fx, 16, STEREO_DFT_OVL_16k );
+            floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb, hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, 16, STEREO_DFT_OVL_16k );
+            floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_32k, hCPE->hStereoDft->output_mem_dmx_32k_fx, 16, STEREO_DFT_OVL_32k );
+            /*flt2fix end*/
+
+            Word16 out_start_ind, out_end_ind;
+            CldfbHB_fx = stereo_dft_enc_synthesize_fx( hCPE->hStereoDft, old_input_fx + STEREO_DFT_OVL_16k, &out_start_ind, &out_end_ind, st->idchan, input_Fs, 16000, st->L_frame, &CldfbHB_fx_e );
+
+            /*fix2flt*/
+            CldfbHB = me2f( CldfbHB_fx, CldfbHB_fx_e );
+            hCPE->hStereoDft->icbweRefEner = me2f( hCPE->hStereoDft->icbweRefEner_fx, hCPE->hStereoDft->icbweRefEner_fx_e );
+            hCPE->hStereoDft->lbEner = me2f( hCPE->hStereoDft->lbEner_fx, hCPE->hStereoDft->lbEner_fx_e );
+            fixedToFloat_arrL( old_input_fx + STEREO_DFT_OVL_16k + out_start_ind, old_input + STEREO_DFT_OVL_16k + out_start_ind, 16, out_end_ind - out_start_ind );
+            fixedToFloat_arrL( hCPE->hStereoDft->output_mem_res_8k_fx, hCPE->hStereoDft->output_mem_res_8k, 16, STEREO_DFT_OVL_8k );
+            fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_fx, hCPE->hStereoDft->output_mem_dmx, 16, STEREO_DFT_OVL_MAX );
+            fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_12k8_fx, hCPE->hStereoDft->output_mem_dmx_12k8, 16, STEREO_DFT_OVL_12k8 );
+            fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_fx, hCPE->hStereoDft->output_mem_dmx_16k, 16, STEREO_DFT_OVL_16k );
+            fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, hCPE->hStereoDft->output_mem_dmx_16k_shb, 16, STEREO_DFT_OVL_16k );
+            fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_32k_fx, hCPE->hStereoDft->output_mem_dmx_32k, 16, STEREO_DFT_OVL_32k );
+            /*fix2flt end*/
+
+            /* delay corresponding to CLDFB delay */
+            mvr2r( old_input + STEREO_DFT_OVL_16k - 20, shb_speech, L_FRAME16k );
+            mvr2r( old_input, hBWE_TD->old_speech_shb + L_LOOK_16k + L_SUBFR16k - ( STEREO_DFT_OVL_16k - 20 ), STEREO_DFT_OVL_16k - 20 );
+            mvr2r( old_input, hCPE->hStereoICBWE->mem_shb_speech_ref, STEREO_DFT_OVL_16k - 20 );
+
+            if ( CldfbHB <= 0 )
+            {
+                CldfbHB = 1.0f;
+            }
+            hBWE_TD->cldfbHBLT_flt = 0.9f * hBWE_TD->cldfbHBLT_flt + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
+
+            lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
+            hCPE->hStereoICBWE->icbweRefEner = 0.05f * (float) sqrt( hCPE->hStereoDft->icbweRefEner );
+            lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
+            thr = icbwe_thr_DFT;
+            regV = icbwe_regressionValuesDFT;
+        }
+        else
+        {
+            if ( st->L_frame == L_FRAME )
+            {
+                startB = 34;
+                endB = 14;
+                for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
+                {
+                    for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
+                    {
+                        sign = ( ts % 2 ) ? 1.0f : -1.0f;
+                        realBufferFlipped[ts][uB] = -sign * realBuffer[ts][nB];
+                        imagBufferFlipped[ts][uB] = sign * imagBuffer[ts][nB];
+                    }
+                }
+            }
+            else
+            {
+                startB = 39;
+                endB = 19;
+                for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
+                {
+                    for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
+                    {
+                        realBufferFlipped[ts][uB] = -realBuffer[ts][nB];
+                        imagBufferFlipped[ts][uB] = imagBuffer[ts][nB];
+                    }
+                }
+            }
+
+            for ( nB = 0; nB < 10; nB++ )
+            {
+                for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
+                {
+                    CldfbHB += ( realBufferFlipped[ts][nB] * realBufferFlipped[ts][nB] + imagBufferFlipped[ts][nB] * imagBufferFlipped[ts][nB] );
+                }
+            }
+            if ( CldfbHB <= 0 )
+            {
+                CldfbHB = 1.0f;
+            }
+            hBWE_TD->cldfbHBLT_flt = 0.9f * hBWE_TD->cldfbHBLT_flt + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
+
+            if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
+            {
+                hCPE->hStereoICBWE->icbweRefEner = EPSILON;
+                for ( nB = 20; nB < 40; nB++ )
+                {
+                    for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
+                    {
+                        hCPE->hStereoICBWE->icbweRefEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
+                    }
+                }
+                hCPE->hStereoICBWE->icbweRefEner = 0.05f * sqrtf( hCPE->hStereoICBWE->icbweRefEner );
+            }
+
+            lbEner = EPSILON;
+            for ( nB = 0; nB < 20; nB++ )
+            {
+                for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
+                {
+                    lbEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
+                }
+            }
+            lbEner = 0.05f * sqrtf( lbEner );
+            thr = icbwe_thr_TDM;
+            regV = icbwe_regressionValuesTDM;
+
+            cldfbSynthesis_ivas( realBufferFlipped, imagBufferFlipped, shb_speech, -1, st->cldfbSynTd );
+        }
+
+        if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
+        {
+            hCPE->hStereoICBWE->MSFlag = 0; /* Init the multi-source flag */
+            v = 0.3333f * sum_f( st->voicing, 3 );
+            t = log10f( ( hCPE->hStereoICBWE->icbweRefEner + 1e-6f ) / ( lbEner + 1e-6f ) );
+
+            /* Three Level Decision Tree to calculate a regression value first */
+            if ( t < thr[0] ) /* level 1 */
+            {
+                if ( t < thr[1] ) /* level 2 */
+                {
+                    regression = ( v < thr[3] ) ? regV[0] : regV[1]; /* level 3 */
+                }
+                else
+                {
+                    regression = ( v < thr[4] ) ? regV[2] : regV[3]; /* level 3 */
+                }
+            }
+            else
+            {
+                if ( t < thr[2] ) /* level 2 */
+                {
+                    regression = ( v < thr[5] ) ? regV[4] : regV[5]; /* level 3 */
+                }
+                else
+                {
+                    regression = ( v < thr[6] ) ? regV[6] : regV[7]; /* level 3 */
+                }
+            }
+
+            /* Convert the regression to a hard decision (classification) */
+            if ( regression > 0.79f && !( st->bwidth < SWB || hCPE->hCoreCoder[0]->vad_flag == 0 ) )
+            {
+                hCPE->hStereoICBWE->MSFlag = 1;
+            }
+        }
+
+        if ( st->extl != WB_TBE && st->extl != SWB_TBE && st->extl != FB_TBE )
+        {
+            /* Update the previous superwideband speech buffer in case of a SWB_BWE frame - this code is in swb_tbe_enc */
+            delay = L_LOOK_16k + L_SUBFR16k;
+            mvr2r( shb_speech + L_FRAME16k - delay, hBWE_TD->old_speech_shb, delay );
+        }
+    }
+    else
+    {
+        if ( ( st->bwidth == FB || st->core == ACELP_CORE ) && ( st->element_mode == EVS_MONO ) )
+        {
+            InitSWBencBufferStates( st->hBWE_TD, shb_speech );
+        }
+        else
+        {
+            if ( st->element_mode == IVAS_CPE_DFT )
+            {
+                if ( st->L_frame == L_FRAME )
+                {
+                    L_resamp = 560; /* 6.4 kHz core -> 6 - 14 kHz SHB target. 20 ms is 560 samples in 28 kHz sample rate */
+                }
+                else
+                {
+                    L_resamp = 620; /* 8 kHz core -> 7.5 - 15.5 kHz SHB target. 20 ms is 620 samples in 31 kHz sample rate */
+                }
+
+                /* Dirty downsampling to match Nyquist to upper frequency limit of target */
+                lerp_flt( st->input, new_swb_speech, L_resamp, (int16_t) ( input_Fs / 50 ) );
+
+                /* flip the spectrum */
+                mvr2r( new_swb_speech, spchTmp, L_resamp );
+                for ( i = 0; i < L_resamp; i = i + 2 )
+                {
+                    spchTmp[i] = -spchTmp[i];
+                }
+
+                /* Dirty upsampling to match Nyquist/2 to lower frequency limit of target (reversed spectrum)*/
+                lerp_flt( spchTmp, spchTmp2, L_FRAME32k, L_resamp );
+                mvr2r( spchTmp2, spchTmp, L_FRAME32k );
+            }
+            else
+            {
+                /* flip the spectrum */
+                mvr2r( new_swb_speech, spchTmp, L_FRAME32k );
+
+                for ( i = 0; i < L_FRAME32k; i = i + 2 )
+                {
+                    spchTmp[i] = -spchTmp[i];
+                }
+            }
+
+            Decimate_allpass_steep( spchTmp, hBWE_TD->state_ana_filt_shb, L_FRAME32k, shb_speech );
+
+            mvr2r( shb_speech + L_FRAME16k - ( L_LOOK_16k + L_SUBFR16k ), hBWE_TD->old_speech_shb, L_LOOK_16k + L_SUBFR16k );
+
+            /*Compute the past overlap for potential next iDFTs SHB*/
+            if ( st->element_mode == IVAS_CPE_DFT )
+            {
+                for ( i = 0; i < STEREO_DFT_OVL_16k; i++ )
+                {
+                    hCPE->hStereoDft->output_mem_dmx_16k_shb[i] = shb_speech[20 + i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i];
+                }
+            }
+        }
+
+        if ( st->element_mode != IVAS_CPE_DFT )
+        {
+            /* Reset CLDFB synthesis buffer */
+            set_f( st->cldfbSynTd->cldfb_state, 0.0f, st->cldfbSynTd->p_filter_length );
+        }
+        else
+        {
+            hCPE->hStereoDft->flip_sign = -hCPE->hStereoDft->flip_sign; /* Make sure sign is updated even if DFT SHB target is not generated */
+        }
+    }
+
+    /* Memory reset to compensate for 0.9375 ms offset when transitioning from IO to SWB */
+    /* When switching from n >1 to n = 1, we keep the enc/dec delay as 8.75/3.25 and below code not needed;
+       only when n = 1 start, it will be 9.6875/2.3125 in that case this reset is needed for IO->BWE.*/
+    if ( st->last_extl == -1 && st->element_mode == EVS_MONO )
+    {
+        delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
+        for ( i = 0; i < delay; i++ )
+        {
+            shb_speech[i] = (float) i * ( 0.03f * shb_speech[2 * delay - 1 - i] );
+        }
+    }
+
+    return;
+}
+#endif
 void swb_pre_proc(
     Encoder_State *st,                                         /* i/o: encoder state structure                 */
     float *new_swb_speech,                                     /* o  : original input signal at 32kHz          */
-- 
GitLab


From 1c69d8b9b63d798c4af87d88aa437727fe106200 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Fri, 19 Jul 2024 14:45:39 +0530
Subject: [PATCH 2/2] Clang formatting changes

---
 lib_enc/ivas_core_enc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c
index 0d39208e7..37b321bb5 100644
--- a/lib_enc/ivas_core_enc.c
+++ b/lib_enc/ivas_core_enc.c
@@ -862,4 +862,4 @@ ivas_error ivas_core_enc(
 
     return error;
 }
-#endif
\ No newline at end of file
+#endif
-- 
GitLab