Commit ff64fead authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

stereo_dft_enc_synthesize_fx implementation

parent a3f8d010
Loading
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -7111,6 +7111,12 @@ void rfft_fx(
            s2 = -204;
            move16();
            BREAK;
        case 256:
            s1 = 128;
            move16();
            s2 = -128;
            move16();
            BREAK;
        case 320:
            s1 = 102;
            move16();
@@ -7123,6 +7129,18 @@ void rfft_fx(
            s2 = -68;
            move16();
            BREAK;
        case 640:
            s1 = 51;
            move16();
            s2 = -51;
            move16();
            BREAK;
        case 960:
            s1 = 34;
            move16();
            s2 = -34;
            move16();
            BREAK;
        default:
            s1 = -1;
            move16();
+14 −0
Original line number Diff line number Diff line
@@ -1123,6 +1123,20 @@ void stereo_dft_dec_fx(
    const Word16 num_md_sub_frames                /* i  : number of MD subframes             */
);

// ivas_stereo_dft_enc.c
#ifdef IVAS_FLOAT_FIXED
Word32 stereo_dft_enc_synthesize_fx(
    STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle       */
    // float *output,                         /* o  : output synthesis            */
    Word32 *output_fx, /* o  : output synthesis           Q16 */
    Word16 *output_start_index,
    Word16 *output_end_index,
    const Word16 chan,                 /* i  : channel number              */
    const Word32 input_Fs,             /* i  : input sampling rate         */
    const Word32 output_sampling_rate, /* i  : output sampling rate        */
    const Word16 L_frame,              /* i  : frame length at internal Fs */
    Word16 *nrg_out_fx_e );
#endif

void ivas_ls_setup_conversion_fx(
    Decoder_Struct *st_ivas,   /* i  : IVAS decoder structure           */
+12 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@
#include "ivas_cnst.h"
#include "stat_enc.h"
#include "stat_dec.h"
#include "ivas_stat_enc.h"
#include "ivas_stat_dec.h"
#include "ivas_error.h"
#include "ivas_error_utils.h"
@@ -10068,4 +10069,15 @@ Word16 sr2fscale(
    const Word32 sr_core /* i  : internal sampling rate        */
);

// pre_proc functions
/*full implementation pending*/
void swb_pre_proc_ivas_fx(
    Encoder_State *st,                                         /* i/o: encoder state structure                 */
    float *new_swb_speech,                                     /* o  : original input signal at 32kHz          */
    Word32 *new_swb_speech_fx,                                 /* o  : original input signal at 32kHz          */
    float *shb_speech,                                         /* o  : SHB target signal (6-14kHz) at 16kHz    */
    float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : real buffer                             */
    float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i  : imag buffer                             */
    CPE_ENC_HANDLE hCPE                                        /* i/o: CPE encoder structure                   */
);
#endif
+409 −1
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include "wmc_auto.h"
#include <math.h>
#ifdef IVAS_FLOAT_FIXED
#include "prot_fx.h"
#include "ivas_prot_fx.h"
#endif

@@ -49,7 +50,7 @@
 *
 * Principal IVAS core coder routine, where number of core channels is 1 or 2
 *-------------------------------------------------------------------*/

#ifdef IVAS_FLOAT_FIXED
ivas_error ivas_core_enc(
    SCE_ENC_HANDLE hSCE,                                         /* i/o: SCE encoder structure                   */
    CPE_ENC_HANDLE hCPE,                                         /* i/o: CPE encoder structure                   */
@@ -90,6 +91,10 @@ ivas_error ivas_core_enc(
    float hb_speech[L_FRAME16k / 4];
    float *new_swb_speech;
    float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX];
#ifdef IVAS_FLOAT_FIXED
    Word32 *new_swb_speech_fx;
    Word32 new_swb_speech_buffer_fx[L_FRAME48k + STEREO_DFT_OVL_MAX];
#endif
    float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET];
    float voice_factors[CPE_CHANNELS][NB_SUBFR16k];
    int16_t Voicing_flag[CPE_CHANNELS];
@@ -363,11 +368,18 @@ ivas_error ivas_core_enc(
         *---------------------------------------------------------------------*/

        new_swb_speech = new_swb_speech_buffer + STEREO_DFT_OVL_MAX;
#ifdef IVAS_FLOAT_FIXED
        new_swb_speech_fx = new_swb_speech_buffer_fx + STEREO_DFT_OVL_MAX;
#endif

        if ( !st->Opt_SC_VBR && input_Fs >= 32000 && st->hBWE_TD != NULL )
        {
            /* Common pre-processing for SWB(FB) TBE and SWB(FB) BWE */
#ifdef IVAS_FLOAT_FIXED
            swb_pre_proc_ivas_fx( st, new_swb_speech, new_swb_speech_fx, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
#else
            swb_pre_proc( st, new_swb_speech, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
#endif
        }
        else if ( input_Fs >= 32000 )
        {
@@ -455,3 +467,399 @@ ivas_error ivas_core_enc(

    return error;
}
#else
ivas_error ivas_core_enc(
    SCE_ENC_HANDLE hSCE,                                         /* i/o: SCE encoder structure                   */
    CPE_ENC_HANDLE hCPE,                                         /* i/o: CPE encoder structure                   */
    MCT_ENC_HANDLE hMCT,                                         /* i/o: MCT encoder structure                   */
    const int16_t n_CoreChannels,                                /* i  : number of core channels to be coded     */
    float old_inp_12k8[][L_INP_12k8],                            /* i  : buffer of old input signal              */
    float old_inp_16k[][L_INP],                                  /* i  : buffer of old input signal              */
    float ener[],                                                /* i  : residual energy from Levinson-Durbin    */
    float A[][NB_SUBFR16k * ( M + 1 )],                          /* i  : A(z) unquantized for the 4 subframes    */
    float Aw[][NB_SUBFR16k * ( M + 1 )],                         /* i  : weighted A(z) unquantized for subframes */
    float epsP[][M + 1],                                         /* i  : LP prediction errors                    */
    float lsp_new[][M],                                          /* i  : LSPs at the end of the frame            */
    float lsp_mid[][M],                                          /* i  : LSPs in the middle of the frame         */
    const int16_t vad_hover_flag[],                              /* i  : VAD hanglover flag                      */
    int16_t attack_flag[],                                       /* i  : attack flag (GSC or TC)                 */
    float realBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer                             */
    float imagBuffer[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer                             */
    float old_wsp[][L_WSP],                                      /* i  : weighted input signal buffer            */
    const int16_t loc_harm[],                                    /* i  : harmonicity flag                        */
    const float cor_map_sum[],                                   /* i  : speech/music clasif. parameter          */
    const int16_t vad_flag_dtx[],                                /* i  : HE-SAD flag with additional DTX HO      */
    float enerBuffer[][CLDFB_NO_CHANNELS_MAX],                   /* i  : energy buffer                           */
    float fft_buff[][2 * L_FFT],                                 /* i  : FFT buffer                              */
    const int16_t tdm_SM_or_LRTD_Pri,                            /* i  : channel combination scheme flag         */
    const int16_t ivas_format,                                   /* i  : IVAS format                             */
    const int16_t flag_16k_smc                                   /* i  : flag to indicate if the OL SMC is run at 16 kHz */
)
{
    int16_t n, input_frame;
    int16_t cpe_id, MCT_flag;
    Encoder_State **sts, *st;
    STEREO_ICBWE_ENC_HANDLE hStereoICBWE;
    STEREO_TD_ENC_DATA_HANDLE hStereoTD;
    float *inp[CPE_CHANNELS];
    float new_inp_resamp16k[CPE_CHANNELS][L_FRAME16k]; /* new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
    float old_syn_12k8_16k[CPE_CHANNELS][L_FRAME16k];  /* ACELP core synthesis at 12.8kHz or 16kHz to be used by the SWB BWE */
    float shb_speech[L_FRAME16k];
    float hb_speech[L_FRAME16k / 4];
    float *new_swb_speech;
    float new_swb_speech_buffer[L_FRAME48k + STEREO_DFT_OVL_MAX];
    float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET];
    float voice_factors[CPE_CHANNELS][NB_SUBFR16k];
    int16_t Voicing_flag[CPE_CHANNELS];
    float pitch_buf[CPE_CHANNELS][NB_SUBFR16k];
    int16_t unbits[CPE_CHANNELS];
    float tdm_lspQ_PCh[M], tdm_lsfQ_PCh[M];
    int16_t last_element_mode, tdm_Pitch_reuse_flag;
    int32_t element_brate, last_element_brate, input_Fs;
    int16_t diff_nBits;
    ivas_error error;
    int16_t max_num_indices_BWE;

    push_wmops( "ivas_core_enc" );

    error = IVAS_ERR_OK;

    /*------------------------------------------------------------------*
     * General initialization
     *-----------------------------------------------------------------*/

    if ( hSCE != NULL )
    {
        cpe_id = -1;
        MCT_flag = 0;
        sts = hSCE->hCoreCoder;
        hStereoTD = NULL;
        hStereoICBWE = NULL;
        element_brate = hSCE->element_brate;
        last_element_brate = hSCE->last_element_brate;
        last_element_mode = IVAS_SCE;
        tdm_Pitch_reuse_flag = -1;
    }
    else
    {
        cpe_id = hCPE->cpe_id;
        MCT_flag = 0;
        if ( hMCT != NULL )
        {
            MCT_flag = 1;
        }
        sts = hCPE->hCoreCoder;
        hStereoICBWE = hCPE->hStereoICBWE;
        element_brate = hCPE->element_brate;
        last_element_brate = hCPE->last_element_brate;
        last_element_mode = hCPE->last_element_mode;

        if ( hCPE->hStereoTD != NULL )
        {
            hStereoTD = hCPE->hStereoTD;
            tdm_Pitch_reuse_flag = hCPE->hStereoTD->tdm_Pitch_reuse_flag;
        }
        else
        {
            hStereoTD = NULL;
            tdm_Pitch_reuse_flag = -1;
        }
    }

    input_Fs = sts[0]->input_Fs;
    input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC );

    set_f( new_swb_speech_buffer, 0, L_FRAME48k + STEREO_DFT_OVL_MAX );

    for ( n = 0; n < n_CoreChannels; n++ )
    {
        st = sts[n];

        /*------------------------------------------------------------------*
         * Initializiation per core-coder channel
         *-----------------------------------------------------------------*/


        st->extl = -1;
        unbits[n] = 0;

        st->element_brate = element_brate;

        /*---------------------------------------------------------------------*
         * Pre-processing, incl. Decision matrix
         *---------------------------------------------------------------------*/

        if ( ( error = pre_proc_ivas( st, last_element_mode, element_brate, ivas_format == SBA_FORMAT ? last_element_brate : element_brate, input_frame, old_inp_12k8[n], old_inp_16k[n], &inp[n], &ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], new_inp_resamp16k[n], &Voicing_flag[n], old_wsp[n], loc_harm[n], cor_map_sum[n], vad_flag_dtx[n], enerBuffer[n], fft_buff[n], MCT_flag, vad_hover_flag[n], flag_16k_smc ) ) != IVAS_ERR_OK )
        {
            return error;
        }

        if ( st->element_mode == IVAS_CPE_MDCT || st->element_mode == IVAS_SCE )
        {
            st->enablePlcWaveadjust = 0;
        }
    }

    /*------------------------------------------------------------------*
     * Sanity check in combined format coding
     *-----------------------------------------------------------------*/

    diff_nBits = 0;
    if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 )
    {
        ivas_combined_format_brate_sanity( hCPE->element_brate, sts[0]->core, sts[0]->total_brate, &( sts[0]->core_brate ), &( sts[0]->inactive_coder_type_flag ), &diff_nBits );
    }

    /*---------------------------------------------------------------------*
     * Core Encoding
     *---------------------------------------------------------------------*/

    for ( n = 0; n < n_CoreChannels; n++ )
    {
        st = sts[n];

        /* update pointer to the buffer of indices of the second channel */
        if ( n == 1 && st->element_mode == IVAS_CPE_TD )
        {
            /* adjust the pointer to the buffer of indices of the secondary channel (make space for BWE indices) */
            max_num_indices_BWE = get_BWE_max_num_indices( sts[0]->extl_brate );
            st->hBstr->ind_list = sts[0]->hBstr->ind_list + sts[0]->hBstr->nb_ind_tot + max_num_indices_BWE;

            /* write TD stereo spatial parameters */
            move_indices( hStereoTD->tdm_hBstr_tmp.ind_list, st->hBstr->ind_list, hStereoTD->tdm_hBstr_tmp.nb_ind_tot );
            st->hBstr->nb_ind_tot += hStereoTD->tdm_hBstr_tmp.nb_ind_tot;
            st->hBstr->nb_bits_tot += hStereoTD->tdm_hBstr_tmp.nb_bits_tot;

            reset_indices_enc( &hStereoTD->tdm_hBstr_tmp, MAX_IND_TDM_TMP );
        }

        /*---------------------------------------------------------------------*
         * Write signaling info into the bitstream
         *---------------------------------------------------------------------*/

        if ( !MCT_flag || ( MCT_flag && cpe_id == 0 ) )
        {
            ivas_signaling_enc( st, MCT_flag, element_brate, tdm_SM_or_LRTD_Pri, tdm_Pitch_reuse_flag );
        }

        /*---------------------------------------------------------------------*
         * Preprocessing (preparing) for ACELP/HQ core switching
         *---------------------------------------------------------------------*/

        core_switching_pre_enc( st, old_inp_12k8[n], old_inp_16k[n], sts[0]->active_cnt, last_element_mode );

        /*---------------------------------------------------------------------*
         * ACELP core encoding
         * TCX core encoding
         * HQ core encoding
         *---------------------------------------------------------------------*/

        if ( st->core == ACELP_CORE )
        {
            /* ACELP core encoder */
            if ( ( error = acelp_core_enc( st, inp[n], ener[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], vad_hover_flag[0], attack_flag[n], bwe_exc_extended[n], voice_factors[n], old_syn_12k8_16k[n], pitch_buf[n], &unbits[n], hStereoTD, tdm_lsfQ_PCh ) ) != IVAS_ERR_OK )
            {
                return error;
            }
        }

        if ( ( st->core == TCX_20_CORE || st->core == TCX_10_CORE ) && st->element_mode != IVAS_CPE_MDCT )
        {
            /* TCX core encoder */
            stereo_tcx_core_enc( st, old_inp_12k8[n] + L_INP_MEM, old_inp_16k[n] + L_INP_MEM, Aw[n], lsp_new[n], lsp_mid[n], pitch_buf[n], last_element_mode, vad_hover_flag[0] );
        }

        if ( st->core == HQ_CORE )
        {
            /* HQ core encoder */
            hq_core_enc( st, st->input, input_frame, NORMAL_HQ_CORE, Voicing_flag[n], vad_hover_flag[0] );
        }

        /*---------------------------------------------------------------------*
         * TD stereo updates
         *---------------------------------------------------------------------*/

        if ( st->element_mode == IVAS_CPE_TD && n == 0 )
        {
            td_stereo_param_updt( st->lsp_old, st->lsf_old, pitch_buf[0], tdm_lspQ_PCh, tdm_lsfQ_PCh, hStereoTD->tdm_Pri_pitch_buf, st->flag_ACELP16k, hStereoTD->tdm_use_IAWB_Ave_lpc );
        }
    }


    /*---------------------------------------------------------------------*
     * MDCT stereo: joint TCX Core Encoding
     *---------------------------------------------------------------------*/

    if ( sts[0]->element_mode == IVAS_CPE_MDCT )
    {
        if ( sts[0]->core_brate > SID_2k40 && sts[1]->core_brate > SID_2k40 )
        {
            if ( MCT_flag )
            {
                ivas_mdct_core_whitening_enc( hCPE, old_inp_16k, old_wsp, pitch_buf, hMCT->p_mdst_spectrum_long[cpe_id], hMCT->tnsBits[cpe_id], hMCT->p_orig_spectrum_long[cpe_id],
                                              hMCT->tnsSize[cpe_id], hMCT->p_param[cpe_id], hMCT->hBstr, 1, hMCT->nchan_out_woLFE );
            }
            else
            {
                stereo_mdct_core_enc( hCPE, old_inp_16k, old_wsp, pitch_buf );
            }
        }
        else if ( sts[0]->core_brate == SID_2k40 && sts[1]->core_brate == SID_2k40 )
        {
            /* synch CNG configs between channels */
            for ( n = 0; n < CPE_CHANNELS; n++ )
            {
                st = sts[n];
                if ( ( st->hFdCngEnc->hFdCngCom->frameSize != st->L_frame ) || ( st->hFdCngEnc->hFdCngCom->CngBandwidth != st->bwidth ) )
                {
                    configureFdCngEnc( st->hFdCngEnc, max( st->bwidth, WB ), st->L_frame == L_FRAME16k ? ACELP_16k40 : ACELP_9k60 );
                }
            }

            if ( sts[0]->cng_sba_flag )
            {
                FdCngEncodeDiracMDCTStereoSID( hCPE );
            }
            else
            {
                FdCngEncodeMDCTStereoSID( hCPE );
            }
        }
    }


    /*---------------------------------------------------------------------*
     * Postprocessing, BWEs and Updates
     *---------------------------------------------------------------------*/

    for ( n = 0; n < n_CoreChannels; n++ )
    {
        st = sts[n];

        /*---------------------------------------------------------------------*
         * Postprocessing for ACELP/HQ core switching
         *---------------------------------------------------------------------*/

        core_switching_post_enc( st, old_inp_12k8[n], old_inp_16k[n], A[n] );

        /*---------------------------------------------------------------------*
         * WB TBE encoding
         * WB BWE encoding
         *---------------------------------------------------------------------*/

        if ( input_Fs >= 16000 && st->bwidth < SWB && st->hBWE_TD != NULL )
        {
            /* Common pre-processing for WB TBE and WB BWE */
            wb_pre_proc( st, last_element_mode, new_inp_resamp16k[n], hb_speech );
        }

        if ( st->extl == WB_TBE )
        {
            /* WB TBE encoder */
            wb_tbe_enc( st, hb_speech, bwe_exc_extended[n], voice_factors[n], pitch_buf[n] );
        }
        else if ( st->extl == WB_BWE && n == 0 && st->element_mode != IVAS_CPE_MDCT )
        {
            /* WB BWE encoder */
            wb_bwe_enc( st, new_inp_resamp16k[n] );
        }

        /*---------------------------------------------------------------------*
         * SWB(FB) TBE encoding
         * SWB(FB) BWE encoding
         *---------------------------------------------------------------------*/

        new_swb_speech = new_swb_speech_buffer + STEREO_DFT_OVL_MAX;

        if ( !st->Opt_SC_VBR && input_Fs >= 32000 && st->hBWE_TD != NULL )
        {
            /* Common pre-processing for SWB(FB) TBE and SWB(FB) BWE */
            swb_pre_proc( st, new_swb_speech, shb_speech, realBuffer[n], imagBuffer[n], hCPE );
        }
        else if ( input_Fs >= 32000 )
        {
            if ( st->hBWE_TD != NULL )
            {
                InitSWBencBufferStates( st->hBWE_TD, shb_speech );
            }
        }

        /* SWB TBE encoder */
        if ( st->extl == SWB_TBE || st->extl == FB_TBE )
        {
            if ( st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 )
            {
                float fb_exc[L_FRAME16k];

                swb_tbe_enc( st, hStereoICBWE, shb_speech, bwe_exc_extended[n], voice_factors[n], fb_exc, pitch_buf[n] );

                if ( st->extl == FB_TBE )
                {
                    /* FB TBE encoder */
                    fb_tbe_enc( st, st->input, fb_exc );
                }
            }
        }
        else if ( st->extl == SWB_BWE || st->extl == FB_BWE )
        {
            /* SWB(FB) BWE encoder */
            swb_bwe_enc( st, last_element_mode, old_inp_12k8[n], old_inp_16k[n], old_syn_12k8_16k[n], new_swb_speech, shb_speech );
        }

        /*---------------------------------------------------------------------*
         * SWB DTX/CNG encoding
         *---------------------------------------------------------------------*/

        if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && ( input_frame >= L_FRAME32k || st->element_mode == IVAS_CPE_DFT ) )
        {
            /* SHB DTX/CNG encoder */
            swb_CNG_enc( st, shb_speech, old_syn_12k8_16k[n] );
        }

        /*-------------------------------------------------------------------*
         *  Inter-channel BWE encoding
         *-------------------------------------------------------------------*/

        if ( n == 0 && input_Fs >= 32000 && hStereoICBWE != NULL )
        {
            stereo_icBWE_preproc( hCPE, input_frame, new_swb_speech_buffer /*tmp buffer*/ );

            stereo_icBWE_enc( hCPE, shb_speech, new_swb_speech_buffer, voice_factors[0] );
        }

        /*---------------------------------------------------------------------*
         * Channel-aware mode - write signaling information into the bitstream
         *---------------------------------------------------------------------*/

        signaling_enc_rf( st );

        /*---------------------------------------------------------------------*
         * Common updates
         *---------------------------------------------------------------------*/

        if ( !MCT_flag ) /* for MCT do this later, otherwise there can be a problem because TCX quant happens later and might get the wrong last_core on a bit rate switch */
        {
            updt_enc_common( st );
        }
    }

    /*------------------------------------------------------------------*
     * Write potentially unused bits in combined format coding
     *-----------------------------------------------------------------*/

    if ( hCPE != NULL && hCPE->element_mode == IVAS_CPE_DFT && hCPE->brate_surplus > 0 )
    {
        while ( diff_nBits > 0 )
        {
            n = min( diff_nBits, 16 );
            push_indice( sts[0]->hBstr, IND_UNUSED, 0, n );
            diff_nBits -= n;
        }
    }


    pop_wmops();

    return error;
}
#endif
 No newline at end of file
+744 −4

File changed.

Preview size limit exceeded, changes collapsed.

Loading