Commit dce4d2e3 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Core coder and stereo path sub-funcs conversion and integration

[x] generate comfort noise
[x] stereo_tdm_ener_analysis
[x] pre_proc_front_ivas few sub-funcs
[x] updates to front_vad
[x] acelp_core_switching
parent 27744513
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -752,6 +752,7 @@ enum
#define GAMMA_EV                            0.92f                  /* weighting factor for core synthesis error weighting */
#define FORMANT_SHARPENING_NOISE_THRESHOLD  21.0f                  /* lp_noise level above which formant sharpening is deactivated */
#define E_MIN_FX                              1       /* QSCALE (Q7)*/
#define E_MIN_IVAS_FX                         1835       /* (Q19) (E12) */
#define STEP_DELTA_FX                         11
#define FORMANT_SHARPENING_NOISE_THRESHOLD_FX   5376     /* 21 (!8)lp_noise level above which formant sharpening is deactivated - at this level most of 20 dB SNR office noisy speech still uses sharpening */

+142 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include "rom_basop_util.h"
#include "rom_com.h"
#include "prot_fx.h"
#include "prot_fx_enc.h"
#include "ivas_prot_fx.h"

#define DELTA_SHIFT      2
@@ -2471,6 +2472,147 @@ void AnalysisSTFT_fx(
}
#endif // IVAS_FLOAT_FIXED

#ifdef IVAS_FLOAT_FIXED
/*-------------------------------------------------------------------
 * SynthesisSTFT_enc_ivas_fx()
 *
 * STFT synthesis filterbank
 *-------------------------------------------------------------------*/

void SynthesisSTFT_enc_ivas_fx(
    Word32 *fftBuffer,        /* i    : pointer to FFT bins */
    Word16 fftBufferExp,      /* i    : exponent of FFT bins */
    Word16 *timeDomainOutput, /* o    : pointer to time domain signal */
    Word16 *olapBuffer,       /* i/o  : pointer to overlap buffer */
    const PWord16 *olapWin,   /* i    : pointer to overlap window */
    Word16 tcx_transition,
    HANDLE_FD_CNG_COM hFdCngCom, /* i/o  : pointer to FD_CNG structure containing all buffers and variables */
    Word16 gen_exc,
    Word16 *Q_new,             /* i  : Q of generated exc_cng */
    const Word16 element_mode, /* i  : element mode */
    const Word16 nchan_out     /* i  : number of output channels */
)
{
    Word16 i, len, scale, tmp;
    Word16 len2, len3, len4;
    Word16 buf[M + 1 + L_FRAME16k];


    /* Perform IFFT */
    scale = 0;
    move16();
    BASOP_rfft( fftBuffer, hFdCngCom->fftlen, &scale, 1 );
    fftBufferExp = add( fftBufferExp, scale );
    hFdCngCom->fftBuffer_exp = fftBufferExp;
    move16();

    fftBufferExp = add( fftBufferExp, hFdCngCom->fftlenShift );

    /* Perform overlap-add */
    /* Handle overlap in P/S domain for stereo */
    test();
    test();
    IF( ( EQ_16( element_mode, IVAS_CPE_TD ) || EQ_16( element_mode, IVAS_CPE_DFT ) ) && EQ_16( nchan_out, 2 ) )
    {
        Copy( olapBuffer + 3 * hFdCngCom->frameSize / 4 - ( M + 1 ), buf, hFdCngCom->frameSize + M + 1 );
        set16_fx( olapBuffer, 0, hFdCngCom->fftlen );
    }
    ELSE
    {
        Copy( olapBuffer + hFdCngCom->frameSize, olapBuffer, hFdCngCom->frameSize );
        set16_fx( olapBuffer + hFdCngCom->frameSize, 0, hFdCngCom->frameSize );
    }
    len2 = shr( hFdCngCom->fftlen, 2 );
    len4 = shr( hFdCngCom->fftlen, 3 );
    len3 = add( len2, len4 );
    len = add( hFdCngCom->frameSize, len4 );
    IF( tcx_transition )
    {
        FOR( i = 0; i < len; i++ )
        {
            olapBuffer[i] = round_fx_sat( L_shl_sat( fftBuffer[i], sub( fftBufferExp, 15 ) ) );
            move16();
        }
    }
    ELSE
    {
        FOR( i = 0; i < len4; i++ )
        {
            olapBuffer[i + 1 * len4] = add_sat( olapBuffer[i + 1 * len4], mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 1 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[i].v.im ) );
            move16();
            olapBuffer[i + 2 * len4] = add_sat( olapBuffer[i + 2 * len4], mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 2 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[len4 - 1 - i].v.re ) );
            move16();
        }
        FOR( i = len3; i < len; i++ )
        {
            olapBuffer[i] = round_fx_sat( L_shl_sat( fftBuffer[i], sub( fftBufferExp, 15 ) ) );
            move16();
        }
    }

    FOR( i = 0; i < len4; i++ )
    {
        olapBuffer[i + 5 * len4] = mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 5 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[i].v.re );
        move16();
        olapBuffer[i + 6 * len4] = mult_r( round_fx_sat( L_shl_sat( fftBuffer[i + 6 * len4], sub( fftBufferExp, 15 ) ) ), olapWin[len4 - 1 - i].v.im );
        move16();
    }

    len = add( len, len2 );
    FOR( i = len; i < hFdCngCom->fftlen; i++ )
    {
        olapBuffer[i] = 0;
        move16();
    }

    /* Get time-domain signal */
    FOR( i = 0; i < hFdCngCom->frameSize; i++ )
    {
        timeDomainOutput[i] = mult_r( olapBuffer[i + len4], hFdCngCom->fftlenFac );
        move16();
    }
    /* Generate excitation */
    test();
    test();
    IF( ( EQ_16( element_mode, IVAS_CPE_TD ) || EQ_16( element_mode, IVAS_CPE_DFT ) ) && EQ_16( nchan_out, 2 ) )
    {
        FOR( i = 0; i < hFdCngCom->frameSize / 2; i++ )
        {
            buf[i + ( M + 1 )] = add( buf[i + ( M + 1 )], olapBuffer[i + hFdCngCom->frameSize / 4] );
            move16();
        }

        FOR( i = 0; i < M + 1 + hFdCngCom->frameSize; i++ )
        {
            buf[i] = mult_r( buf[i], hFdCngCom->fftlenFac );
            move16();
        }
    }
    ELSE
    {
        FOR( i = 0; i < M + 1 + hFdCngCom->frameSize; i++ )
        {
            buf[i] = mult_r( olapBuffer[i + len4 - M - 1], hFdCngCom->fftlenFac );
            move16();
        }
        tmp = buf[0];
        move16();
    }
    IF( EQ_16( gen_exc, 1 ) )
    {

        E_UTIL_f_preemph2( sub( *Q_new, 1 ), buf + 1, PREEMPH_FAC, M + hFdCngCom->frameSize, &tmp );
        Residu3_fx( hFdCngCom->A_cng, buf + 1 + M, hFdCngCom->exc_cng, hFdCngCom->frameSize, 1 );
    }
    IF( EQ_16( gen_exc, 2 ) )
    {
        *Q_new = E_UTIL_f_preemph3( buf + 1, PREEMPH_FAC, M + hFdCngCom->frameSize, &tmp, 1 );
        move16();
        Residu3_fx( hFdCngCom->A_cng, buf + 1 + M, hFdCngCom->exc_cng, hFdCngCom->frameSize, 1 );
    }
}
#endif

/*-------------------------------------------------------------------
 * SynthesisSTFT()
 *
+23 −23
Original line number Diff line number Diff line
@@ -2644,11 +2644,11 @@ void unclr_classifier_dft(
    CPE_ENC_HANDLE hCPE                                         /* i/o: CPE encoder structure                           */
);
#endif

#ifndef IVAS_FLOAT_FIXED
void unclr_classifier_td(
    CPE_ENC_HANDLE hCPE                                         /* i/o: CPE encoder structure                           */
);

#endif
#ifdef IVAS_FLOAT_FIXED
void xtalk_classifier_dft_fx(
    CPE_ENC_HANDLE hCPE,    /* i/o: CPE encoder structure                   */
@@ -2661,11 +2661,11 @@ void xtalk_classifier_dft(
    const int16_t itd,                                          /* i  : ITD from DFT stereo - used as a feature         */
    const float gcc_phat[]                                      /* i  : GPHAT cross-channel correlation function        */
);

#ifndef IVAS_FLOAT_FIXED
void xtalk_classifier_td(
    CPE_ENC_HANDLE hCPE                                         /* i/o: CPE encoder structure                           */
);

#endif

/*----------------------------------------------------------------------------------*
 * TD Stereo prototypes
@@ -2691,7 +2691,7 @@ void stereo_tdm_prep_dwnmx (
    const float *input1,                                        /* i  : right channel input                     */       
    const int16_t input_frame                                   /* i  : frame lenght                            */           
);

#ifndef IVAS_FLOAT_FIXED
int16_t stereo_tdm_ener_analysis(
    const int16_t ivas_format,                                  /* i  : IVAS format                             */
    CPE_ENC_HANDLE hCPE,                                        /* i  : CPE structure                           */
@@ -2699,7 +2699,7 @@ int16_t stereo_tdm_ener_analysis(
    int16_t *tdm_SM_or_LRTD_Pri,                                /* o  : channel combination scheme flag in TD stereo OR LRTD primary channel */
    int16_t *tdm_ratio_idx_SM                                   /* o  : TDM ratio index for SM mode             */
);

#endif
void stereo_tdm_downmix(
    STEREO_TD_ENC_DATA_HANDLE hStereoTD,                        /* i  : TD stereo IVAS encoder structure        */
    float *Left_in,                                             /* i/o: Left channel -> Primary channel         */
@@ -3661,7 +3661,7 @@ ivas_error front_vad_fx(
    CPE_ENC_HANDLE hCPE,                        /* i/o: CPE encoder structure, nullable                           */
    Encoder_State *st,                          /* i/o: encoder state structure                                   */
    const ENCODER_CONFIG_HANDLE hEncoderConfig, /* i  : configuration structure                                   */
    FRONT_VAD_ENC_HANDLE *hFrontVads,                           /* i/o: front-VAD handles                       */
    FRONT_VAD_ENC_HANDLE *hFrontVads,           /* i/o: FrontVad handles                                          */
    const int16_t MCT_flag,                     /* i  : hMCT handle allocated (1) or not (0)                      */
    const int16_t input_frame,                  /* i  : frame length                                              */
    int16_t vad_flag_dtx[],                     /* o  : HE-SAD flag with additional DTX HO                        */
@@ -3673,8 +3673,8 @@ ivas_error front_vad_fx(
    float band_energies_LR[2 * NB_BANDS],       /* o  : energy in critical bands without minimum noise floor E_MIN*/
    float *PS_out,                              /* o  : energy spectrum                                           */
    float *Bin_E_out,                           /* o  : log-energy spectrum of the current frame                  */
    Word16 Qinp
);
    Word16 Q_inp ,
    Word16 Q_add );
#endif
ivas_error front_vad_spar(
    SPAR_ENC_HANDLE hSpar,                                      /* i/o: SPAR encoder structure                  */
+20 −0
Original line number Diff line number Diff line
@@ -2045,6 +2045,16 @@ void masa_compensate_two_dir_energy_ratio_index_fx(
    const Word16 hodirac_flag   /* i  : flag to indicate HO-DirAC mode        */
);

#ifdef IVAS_FLOAT_FIXED
int16_t stereo_tdm_ener_analysis_fx(
    const Word16 ivas_format,   /* i  : IVAS format                             */
    CPE_ENC_HANDLE hCPE,        /* i  : CPE structure                           */
    const Word16 input_frame,   /* i  : Number of samples                       */
    Word16 *tdm_SM_or_LRTD_Pri, /* o  : channel combination scheme flag in TD stereo OR LRTD primary channel */
    Word16 *tdm_ratio_idx_SM    /* o  : TDM ratio index for SM mode             */
);
#endif

#ifdef IVAS_FLOAT_FIXED
/*! r: projected azimuth index */
Word16 ivas_dirac_project_azimuth_index(
@@ -3043,4 +3053,14 @@ void tdm_ol_pitch_comparison_fx(
    Word16 pitch_fr[CPE_CHANNELS][NB_SUBFR],  /* i/o: fractional pitch values, Q6            */
    Word16 voicing_fr[CPE_CHANNELS][NB_SUBFR] /* i/o: fractional pitch gains, Q15            */
);

#ifdef IVAS_FLOAT_FIXED
void xtalk_classifier_td_fx(
    CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure                           */
);

void unclr_classifier_td_fx(
    CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure                           */
);
#endif
#endif
+7 −4
Original line number Diff line number Diff line
@@ -5986,9 +5986,12 @@ void core_switching_pre_enc(
void core_switching_post_enc(
    Encoder_State *st, /* i/o: encoder state structure             */
    const float *old_inp_12k8, /* i  : old input signal @12.8kHz               */
    const float *old_inp_16k,  /* i  : old input signal @16kHz                 */
    const float A[]            /* i  : unquant LP filter coefs.                */
    // const float *old_inp_12k8, /* i  : old input signal @12.8kHz           */
    float *old_inp_12k8, /* i  : old input signal @12.8kHz           */
    // const float *old_inp_16k,  /* i  : old input signal @16kHz             */
    float *old_inp_16k, /* i  : old input signal @16kHz             */
    // const float A[]            /* i  : unquant. LP filter coefs.           */
    float A[] /* i  : unquant. LP filter coefs.           */
);
ivas_error core_switching_post_dec(
Loading