Commit 6c27a4cf authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch 'enc_cleanup_3' into 'main'

Encoder cleanup changes, ltv crash fixes for original, 10dB and -10dB inputs

See merge request !815
parents cd63a53d 187699e5
Loading
Loading
Loading
Loading
Loading
+16 −10
Original line number Diff line number Diff line
@@ -240,7 +240,8 @@ ivas_error pre_proc_front_ivas_fx(
    Word16 *relE_fx,                                               /* o  : frame relative energy                  Q8 */
    Word16 A_fx[NB_SUBFR16k * ( M + 1 )],                          /* o  : A(z) unquantized for the 4 subframes    */
    Word16 Aw_fx[NB_SUBFR16k * ( M + 1 )],                         /* o  : weighted A(z) unquantized for subframes */
    float epsP[M + 1],                                         /* o  : LP prediction errors                    */
    Word32 epsP_fx[M + 1],                                         /* o  : LP prediction errors                    */
    Word16 *epsP_fx_q,
    Word16 lsp_new_fx[M],                                          /* o  : LSPs at the end of the frame      Q15      */
    Word16 lsp_mid_fx[M],                                          /* o  : LSPs in the middle of the frame   Q15      */
    Word16 *vad_hover_flag,                                    /* o  : VAD hangover flag                       */
@@ -262,11 +263,14 @@ ivas_error pre_proc_front_ivas_fx(
    const Word16 tdm_lsp_new_PCh_fx[M],                            /* i  : unq. LSPs of primary channel       Q15     */
    const Word16 currFlatness_fx,                                  /* i  : flatness parameter                    Q7  */
    const Word16 tdm_ratio_idx,                                /* i  : Current Ratio_L index                   */
    float fr_bands_LR[][2 * NB_BANDS],                         /* i  : energy in frequency bands               */
    Word32 fr_bands_LR_fx[][2 * NB_BANDS],                         /* i  : energy in frequency bands (fr_bands_LR_fx_q) */
    Word16 fr_bands_LR_fx_q,
    const Word16 Etot_LR_fx[],                                     /* i  : total energy Left & Right channel       Q8*/
    float lf_E_LR[][2 * VOIC_BINS],                            /* i  : per bin spectrum energy in lf, LR channels */
    Word32 lf_E_LR_fx[][2 * VOIC_BINS],                            /* i  : per bin spectrum energy in lf, LR channels (lf_E_LR_fx_q) */
    Word16 lf_E_LR_fx_q,
    const Word16 localVAD_HE_SAD_LR[],                         /* i  : HE-SAD flag without hangover, LR channels  */
    float band_energies_LR[2 * NB_BANDS],                      /* o  : energy in critical bands without minimum noise floor E_MIN */
    Word32 band_energies_LR_fx[2 * NB_BANDS],                      /* o  : energy in critical bands without minimum noise floor E_MIN (band_energies_LR_fx_q) */
    Word16 band_energies_LR_fx_q,
    const Word16 flag_16k_smc,                                 /* i  : flag to indicate if the OL SMC is run at 16 kHz */
    const Word16 front_vad_flag,                               /* i  : front-VAD flag to overwrite VAD decision */
    const Word16 force_front_vad,                              /* i  : flag to force VAD decision               */
@@ -289,7 +293,8 @@ ivas_error pre_proc_ivas(
    Word32 *ener_fx,                                            /* o  : residual energy from Levinson-Durbin Q6 */
    Word16 A[NB_SUBFR16k * ( M + 1 )],                           /* i/o: A(z) unquantized for the 4 subframes    */
    Word16 Aw[NB_SUBFR16k * ( M + 1 )],                          /* i/o: weighted A(z) unquantized for subframes */
    float epsP[M + 1],                                          /* i/o: LP prediction errors                    */
    Word32 epsP_fx[M + 1],                                     /* i  : LP prediction errors         epsP_fx_q */
    Word16 *epsP_fx_q,                                          /* i  : LP prediction errors                   */
    Word16 lsp_new[M],                                           /* i/o: LSPs at the end of the frame            */
    Word16 lsp_mid[M],                                           /* i/o: LSPs in the middle of the frame         */
    Word16 *new_inp_resamp16k_fx,                                   /* o  : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
@@ -529,14 +534,15 @@ ivas_error ivas_core_enc(
    Word32 ener_fx[],                                            /* i  : residual energy from Levinson-Durbin Q6*/
    Word16 A_fx[][NB_SUBFR16k * ( M + 1 )],                      /* i  : A(z) unquantized for the 4 subframes   */
    Word16 Aw_fx[][NB_SUBFR16k * ( M + 1 )],                     /* i  : weighted A(z) unquantized for subframes*/
    float epsP[][M + 1],                                         /* i  : LP prediction errors                   */
    Word32 epsP_fx[][M + 1],                                     /* i  : LP prediction errors         epsP_fx_q */
    Word16 epsP_fx_q[],                                          /* i  : LP prediction errors                   */
    Word16 lsp_new[][M],                                         /* i  : LSPs at the end of the frame       Q15 */
    Word16 lsp_mid[][M],                                         /* i  : LSPs in the middle of the frame    Q15 */
    const int16_t vad_hover_flag[],                              /* i  : VAD hanglover flag                     */
    int16_t attack_flag[],                                       /* i  : attack flag (GSC or TC)                */
    Word32 realBuffer_fx[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer                            */
    Word32 imagBuffer_fx[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer                            */
    Word16 q_re_im_buf,
    Word16 *q_re_im_buf,
    float old_wsp[][L_WSP],                                      /* i  : weighted input signal buffer           */
    const int16_t loc_harm[],                                    /* i  : harmonicity flag                       */
    const Word16 cor_map_sum[],                                   /* i  : speech/music clasif. parameter     Q8 */
@@ -2494,10 +2500,10 @@ void stereo_mdct_core_enc(
    float pitch_buf[CPE_CHANNELS][NB_SUBFR16k]                  /* o  : floating pitch for each subframe        */
);
#else
void stereo_mdct_core_enc(
void stereo_mdct_core_enc_fx(
    CPE_ENC_HANDLE hCPE,                                        /* i/o: CPE encoder structure                   */
    float new_samples[CPE_CHANNELS][L_INP],                     /* i  : new samples                             */
    float old_wsp[CPE_CHANNELS][L_WSP],                         /* i  : 12.8kHz weighted speech (for LTP        */
    Word16 new_samples[CPE_CHANNELS][L_INP],                     /* i  : new samples                             */
    Word16 old_wsp[CPE_CHANNELS][L_WSP],                         /* i  : 12.8kHz weighted speech (for LTP        */
    Word16 pitch_buf_fx[CPE_CHANNELS][NB_SUBFR16k]                  /* o  : floating pitch for each subframe        */
);
#endif
+1 −1
Original line number Diff line number Diff line
@@ -4495,7 +4495,7 @@ void EstimateStereoTCXNoiseLevel_fx(
    Encoder_State **sts,                      /* i  : state handle                                    */
    Word32 *q_spectrum[CPE_CHANNELS][NB_DIV], /* i  : quantized MDCT spectrum                         */
    Word16 gain_tcx[][NB_DIV],                /* i  : global gain                                     */
    Word16 gain_tcx_e,                        /* i  : global gain exponent                            */
    Word16 gain_tcx_e[][NB_DIV],              /* i  : global gain exponent                            */
    Word16 L_frame[][NB_DIV],                 /* i  : frame length                                    */
    Word16 noiseFillingBorder[][NB_DIV],      /* i  : noise filling border                            */
    Word16 hm_active[][NB_DIV],               /* i  : flag indicating if the harmonic model is active */
+21 −20
Original line number Diff line number Diff line
@@ -216,7 +216,7 @@ static void find_enr_dft_fx(
    Word32 *Bin_E_fx,         /* o  : Per bin energy      (Q7)                    */
    Word32 *band_ener_fx,     /* o  : per band energy without E_MIN          (Qout) */
    Word16 Q_inp_dmx,
    Word16 Qout )
    Word16 *Qout )
{
    Word16 i, cnt;
    Word32 tmp_fx;
@@ -308,7 +308,7 @@ static void find_enr_dft_fx(
            ngmult = W_shl( ngmult, ngmult_exp );                                    // Q31 + Q31 - gaurded_bits + ngmult_exp
            BinE_fx[bin_cnt] = Mpy_32_32( W_extract_h( ngmult ), BinE_fx[bin_cnt] ); //(Q31 + Q31 - gaurded_bits + ngmult_exp) - Q32 + (2 * Q_inp_dmx + te_exp - 32) - 31
            move32();
            BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
            BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( *Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
            move32();
            band_fx[i] = L_add( BinE_fx[bin_cnt], band_fx[i] );
            move32();
@@ -324,9 +324,9 @@ static void find_enr_dft_fx(
        move32();
        band_ener_fx[i] = band_fx[i]; /* per band energy without E_MIN   */
        move32();
        if ( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) ) ) )
        IF( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ) ) )
        {
            band_fx[i] = L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) );
            band_fx[i] = L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) );
            move32();
        }
    }
@@ -370,7 +370,7 @@ static void find_enr_dft_fx(
            ngmult = W_shl( ngmult, ngmult_exp );                                    // Q31 + Q31 - gaurded_bits + ngmult_exp
            BinE_fx[bin_cnt] = Mpy_32_32( W_extract_h( ngmult ), BinE_fx[bin_cnt] ); //(Q31 + Q31 - gaurded_bits + ngmult_exp) - Q32 + (2 * Q_inp_dmx + te_exp - 32) - 31
            move32();
            BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
            BinE_fx[bin_cnt] = L_shl( BinE_fx[bin_cnt], sub( *Qout, sub( sub( add( add( shl( Q_inp_dmx, 1 ), te_exp ), ngmult_exp ), 34 ), guarded_bits ) ) );
            move32();
            band_fx[i] = L_add( BinE_fx[bin_cnt], band_fx[i] );
            move32();
@@ -385,9 +385,9 @@ static void find_enr_dft_fx(
        move32();
        band_ener_fx[i] = band_fx[i];
        move32();
        if ( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) ) ) )
        IF( LT_32( band_fx[i], L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) ) ) )
        {
            band_fx[i] = L_shl( E_MIN_Q11_FX, sub( Qout, Q11 ) );
            band_fx[i] = L_shl( E_MIN_Q11_FX, sub( *Qout, Q11 ) );
            move32();
        }
    }
@@ -397,7 +397,7 @@ static void find_enr_dft_fx(
    assert( bin_cnt == ( STEREO_DFT_N_12k8_ENC / 2 - 1 ) );
    BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 1] = BinE_fx[STEREO_DFT_N_12k8_ENC / 2 - 2];
    move32();
    L_lerp_fx( BinE_fx, Bin_E_fx, L_FFT / 2, STEREO_DFT_N_12k8_ENC / 2, &Qout );
    L_lerp_fx( BinE_fx, Bin_E_fx, L_FFT / 2, STEREO_DFT_N_12k8_ENC / 2, Qout );
    MVR2R_WORD32( Bin_E_fx, ptE_fx, VOIC_BINS ); // Qout

    /* find the total log energy */
@@ -518,10 +518,8 @@ void ivas_analy_sp_fx(
    }
    ELSE
    {
        Word16 Q_inp_dmx = Q_factor_arrL( hCPE->hStereoDft->DFT[0], STEREO_DFT_N_MAX_ENC );
        floatToFixed_arrL( hCPE->hStereoDft->DFT[0], hCPE->hStereoDft->DFT_fx[0], Q_inp_dmx, STEREO_DFT_N_MAX_ENC );
        Word16 Qout = add( Q_new, QSCALE - 2 );
        find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, Q_inp_dmx, Qout );
        find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), &Qout );
        MVR2R_WORD32( lf_E, lf_E + VOIC_BINS, VOIC_BINS );
        MVR2R_WORD32( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 );
        MVR2R_WORD32( band_energies, band_energies + NB_BANDS, NB_BANDS );
@@ -583,10 +581,10 @@ void ivas_analy_sp_fx_front(
    Word16 Scale_fac[2],       /* o  : FFT scales factors (2 values by frame) Q0                 */
    Word32 *Bin_E,             /* o  : per-bin energy spectrum                  Q7                */
    Word32 *Bin_E_old,         /* o  : per-bin energy spectrum of the previous frame      Q7      */
    Word32 *PS,                /* o  : per-bin energy spectrum                   Q_new + QSCALE               */
    Word32 *PS,                /* o  : per-bin energy spectrum                   Q_new + QSCALE - 2               */
    Word16 *EspecdB,           /* o  : per-bin log energy spectrum (with f=0) Q7                */
    Word32 *band_energies,     /* o  : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (Q_new + QSCALE + 2)*/
    Word16 *fft_buff           /* o  : FFT coefficients                                   (Q_new + QSCALE + 2)      */
    Word32 *band_energies,     /* o  : energy in critical frequency bands without minimum noise floor MODE2_E_MIN (Q_new + QSCALE)*/
    Word16 *fft_buff           /* o  : FFT coefficients                                   (Q_new + Scale_fac[i_subfr])      */
)
{
    Word16 *pt;
@@ -651,7 +649,9 @@ void ivas_analy_sp_fx_front(
    }
    ELSE
    {
        find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), add( Q_new, QSCALE - 2 ) );
        Word16 Qout = add( Q_new, QSCALE - 2 );
        find_enr_dft_fx( hCPE, input_Fs, hCPE->hStereoDft->DFT_fx[0], pt_bands, lf_E, &LEtot, min_band, max_band, Bin_E, band_energies, sub( Q31, hCPE->hStereoDft->DFT_fx_e[0] ), &Qout );
        scale_sig32( lf_E, VOIC_BINS, sub( add( Q_new, QSCALE - 2 ), Qout ) );
        MVR2R_WORD32( lf_E, lf_E + VOIC_BINS, VOIC_BINS );
        MVR2R_WORD32( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 );
        MVR2R_WORD32( band_energies, band_energies + NB_BANDS, NB_BANDS );
@@ -668,7 +668,8 @@ void ivas_analy_sp_fx_front(
    }
    ELSE
    {
        temp32_log = BASOP_Util_Log10( L_shr( LEtot, 1 ), sub( Q31, add( Q_new, QSCALE - Q2 ) ) ); // Q25
        /* log10( LEtot / 2 ) = log10( LEtot ) - log10( 2 ) */
        temp32_log = L_sub( BASOP_Util_Log10( LEtot, sub( Q31, add( Q_new, QSCALE - Q2 ) ) ), LOG10_2_Q31 >> Q6 ); // Q25
        temp32_log = Mpy_32_32( temp32_log, 1342177280 /* 10.f in Q27 */ );                                        // (Q25, Q27) -> Q21
        *Etot = extract_l( L_shr( temp32_log, Q21 - Q8 ) );                                                        // Q8
        move16();
@@ -1050,7 +1051,7 @@ static void ivas_find_enr1(
        band[i] = L_shl( Ltmp, Q2 );                                                          // Q_new + QSCALE
        move32();

        band_energies[i] = L_shl( band[i], Q2 ); /* per band energy without E_MIN   */ // Q_new + QSCALE + 2
        band_energies[i] = band[i]; /* per band energy without E_MIN   */ // Q_new + QSCALE
        move32();

        if ( LT_32( band[i], e_min ) ) // Q_new + QSCALE
@@ -1098,7 +1099,7 @@ static void ivas_find_enr1(
            band[i] = L_shl( Ltmp, Q2 );                                                          // Q_new + QSCALE
            move32();

            band_energies[i] = L_shl( band[i], Q2 ); /* per band energy without E_MIN   */ // Q_new + QSCALE + 2
            band_energies[i] = band[i]; /* per band energy without E_MIN   */ // Q_new + QSCALE
            move32();

            if ( LT_32( band[i], e_min ) ) // Q_new + QSCALE
+2 −2
Original line number Diff line number Diff line
@@ -1714,7 +1714,7 @@ void EstimateStereoTCXNoiseLevel_fx(
    Encoder_State **sts,                      /* i  : state handle                                    */
    Word32 *q_spectrum[CPE_CHANNELS][NB_DIV], /* i  : quantized MDCT spectrum                         */
    Word16 gain_tcx[][NB_DIV],                /* i  : global gain                                     */
    Word16 gain_tcx_e,                        /* i  : global gain exponent                            */
    Word16 gain_tcx_e[][NB_DIV],              /* i  : global gain exponent                            */
    Word16 L_frame[][NB_DIV],                 /* i  : frame length                                    */
    Word16 noiseFillingBorder[][NB_DIV],      /* i  : noise filling border                            */
    Word16 hm_active[][NB_DIV],               /* i  : flag indicating if the harmonic model is active */
@@ -1821,7 +1821,7 @@ void EstimateStereoTCXNoiseLevel_fx(
                    hTcxEnc->spectrum_e[n] = add( hTcxEnc->spectrum_e[n], Q1 );
                    move16();
                }
                tcx_noise_factor_ivas_fx( hTcxEnc->spectrum_fx[n], hTcxEnc->spectrum_e[n], combined_q_spectrum, iStart, maxNfCalcBw, noiseTransWidth, L_frame[ch][n], gain_tcx[ch][n], gain_tcx_e, hTcxEnc->noiseTiltFactor, &fac_ns[ch][n], fac_ns_q, st->element_mode );
                tcx_noise_factor_ivas_fx( hTcxEnc->spectrum_fx[n], hTcxEnc->spectrum_e[n], combined_q_spectrum, iStart, maxNfCalcBw, noiseTransWidth, L_frame[ch][n], gain_tcx[ch][n], gain_tcx_e[ch][n], hTcxEnc->noiseTiltFactor, &fac_ns[ch][n], fac_ns_q, st->element_mode );

                /* hysteresis for very tonal passages (more stationary noise filling level) */
                IF( EQ_16( *fac_ns_q, 1 ) )
+9 −6
Original line number Diff line number Diff line
@@ -826,7 +826,7 @@ static void init_sig_buffers(
        set_zero( st->buf_wspeech_enc_flt, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k + 320 );
        if ( hTcxEnc != NULL )
        {
            set_zero( hTcxEnc->buf_speech_ltp_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k );
            // set_zero( hTcxEnc->buf_speech_ltp_flt, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k );
        }
    }
    else if ( st->L_frame != L_frame_old && !( ( total_brate >= ACELP_16k40 && total_brate <= ACELP_24k40 ) &&
@@ -836,7 +836,9 @@ static void init_sig_buffers(

        if ( ( st->last_core != TCX_20_CORE ) && ( st->last_core != TCX_10_CORE ) )
        {
            mvr2r( st->buf_speech_enc_flt, hTcxEnc->buf_speech_ltp_flt, st->L_frame );
#ifdef IVAS_FLOAT_FIXED
            floatToFixed_arr( st->buf_speech_enc_flt, st->buf_speech_enc, 0, st->L_frame );
#endif
        }

        mvr2r( st->old_wsp, st->buf_wspeech_enc_flt + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM );
@@ -875,7 +877,7 @@ static void init_sig_buffers(

    if ( hTcxEnc != NULL )
    {
        hTcxEnc->new_speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc + st->encoderLookahead_enc;
        // hTcxEnc->new_speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc + st->encoderLookahead_enc;
    }

    if ( st->hTcxEnc != NULL )
@@ -889,7 +891,7 @@ static void init_sig_buffers(

    if ( hTcxEnc != NULL )
    {
        hTcxEnc->speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc;
        // hTcxEnc->speech_ltp_flt = hTcxEnc->buf_speech_ltp_flt + st->encoderPastSamples_enc;
    }

    if ( st->element_mode > EVS_MONO )
@@ -900,12 +902,13 @@ static void init_sig_buffers(
    {
        st->wspeech_enc_flt = st->buf_wspeech_enc_flt + st->L_frame + L_subfr;
    }

#ifndef IVAS_FLOAT_FIXED
    if ( st->ini_frame == 0 || st->L_frame != L_frame_old || st->last_codec_mode == MODE1 )
    {
        set_zero( st->buf_synth_flt, OLD_SYNTH_SIZE_ENC + L_FRAME32k );
    }
    st->synth_flt = st->buf_synth_flt + st->L_frame + L_subfr;
#endif

    return;
}
@@ -1299,7 +1302,7 @@ static void init_acelp(
            st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW;

            /*ALDO overlap windowed past: also used in MODE1 but for other MDCT-FB*/
            set_f( st->hTcxEnc->old_out, 0, st->L_frame );
            // set_f( st->hTcxEnc->old_out, 0, st->L_frame );
        }
        else
        {
Loading