Commit bd41ba78 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '3gpp_issue_1499_fix' into 'main'

[allow regression] Fix for 3GPP issue 1499: Spike in BASOP encoded signal of MASA selection material

See merge request !1452
parents b3178384 f2d5e62e
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -438,7 +438,7 @@ void ivas_analy_sp_fx(
    Word16 *q_fr_bands,        /* o  : energy in critical frequency bands              Q0                 */
    Word32 *lf_E,              /* o  : per bin E for first...                          q_lf_E             */
    Word16 *q_lf_E,            /* o  : per bin E for first...                          Q0                 */
    Word16 *Etot,              /* o  : total input energy                              Q8                 */
    Word32 *Etot,              /* o  : total input energy                              Q24                 */
    const Word16 min_band,     /* i  : minimum critical band                           Q0                 */
    const Word16 max_band,     /* i  : maximum critical band                           Q0                 */
    Word32 *Bin_E,             /* o  : per-bin energy spectrum                         q_Bin_E            */
@@ -557,7 +557,7 @@ void ivas_analy_sp_fx(

        /* Average total log energy over both half-frames */
        /* *Etot = 10.0f * (float)log10(0.5f * *Etot); */
        *Etot = -12800 /* 10.f * logf(0.00001f) in Q8 */;
        *Etot = -838860800 /* 10.f * log10f(0.00001f) in Q24 : This is when LEtot is 0*/;
        move16();
        IF( LEtot != 0 )
        {
@@ -566,7 +566,7 @@ void ivas_analy_sp_fx(
            LEtot = W_shl( LEtot, exp );                                                                                        // q_fr_bands+2+exp
            Ltmp = BASOP_Util_Log10( W_extract_h( LEtot ), sub( 61, add( *q_fr_bands, exp ) ) /* 31-(q_fr_bands+2+exp-32) */ ); // Q25
            Ltmp = Mpy_32_32( Ltmp, 1342177280 /* 10.f in Q27 */ );                                                             // (Q25, Q27) -> Q21
            *Etot = extract_h( L_shl( Ltmp, Q24 - Q21 ) );                                                                      // Q8
            *Etot = L_shl( Ltmp, Q24 - Q21 );                                                                                   // Q24
            move16();
        }
    }
@@ -599,7 +599,7 @@ void ivas_analy_sp_fx(
        }

        /* Average total log energy over both half-frames */
        *Etot = -12800 /* 10.f * logf(0.00001f) in Q8 */;
        *Etot = -838860800 /* 10.f * log10f(0.00001f) in Q24 : This is when LEtot is 0*/;
        move16();
        IF( LEtot != 0 )
        {
@@ -607,7 +607,7 @@ void ivas_analy_sp_fx(
            LEtot = W_shl( LEtot, exp );                                                                                        // q_fr_bands+exp
            Ltmp = BASOP_Util_Log10( W_extract_h( LEtot ), sub( 62, add( *q_fr_bands, exp ) ) /* 31-(q_fr_bands+1+exp-32) */ ); // Q25
            Ltmp = Mpy_32_32( Ltmp, 1342177280 /* 10.f in Q27 */ );                                                             // (Q25, Q27) -> Q21
            *Etot = extract_h( L_shl( Ltmp, Q24 - Q21 ) );                                                                      // Q8
            *Etot = L_shl( Ltmp, Q24 - Q21 );                                                                                   // Q24
            move16();
        }
    }
+1 −1
Original line number Diff line number Diff line
@@ -729,7 +729,7 @@ Word16 find_uv_ivas_fx( /* o : coding typ
     * Total frame energy difference (dE3)
     *-----------------------------------------------------------------*/

    dE3 = sub( Etot, hNoiseEst->Etot_last_fx ); /*Q8*/
    dE3 = sub( Etot, extract_h( hNoiseEst->Etot_last_32fx ) ); /*Q8*/

    /*-----------------------------------------------------------------*
     * Energy decrease after spike (dE2)
+16 −16
Original line number Diff line number Diff line
@@ -205,7 +205,7 @@ ivas_error pre_proc_front_ivas_fx(
    Word16 S_map_fx[L_FFT / 2];
    Word16 cor_map_sum_LR_fx[CPE_CHANNELS];    /* speech/music clasif. parameter               */
    Word16 S_map_LR_fx[L_FFT / 2];             /* short-term correlation map                   */
    Word16 Etot_fx;                            /* total energy                           Q8    */
    Word32 Etot_fx;                            /* total energy                           Q8    */
    Word32 tmpN_fx[NB_BANDS];                  /* Temporary noise update                       */
    Word32 tmpE_fx[NB_BANDS];                  /* Temporary averaged energy of 2 sf.           */
    Word32 tmpN_LR_fx[CPE_CHANNELS][NB_BANDS]; /* Temporary noise update                       */
@@ -739,7 +739,7 @@ ivas_error pre_proc_front_ivas_fx(

    IF( hStereoClassif != NULL )
    {
        IF( GT_32( sub( st->lp_speech_fx, Etot_fx ), 25 << Q8 ) ) /*Q8*/
        IF( GT_32( sub( st->lp_speech_fx, extract_h( Etot_fx ) ), 25 << Q8 ) ) /*Q8*/
        {
            hStereoClassif->silence_flag = 2;
            move16();
@@ -759,11 +759,11 @@ ivas_error pre_proc_front_ivas_fx(

    IF( hCPE != NULL )
    {
        noise_est_pre_32fx( L_deposit_h( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE->last_element_mode );
        noise_est_pre_32fx( ( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE->last_element_mode );
    }
    ELSE
    {
        noise_est_pre_32fx( L_deposit_h( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, element_mode );
        noise_est_pre_32fx( ( Etot_fx ), st->ini_frame, st->hNoiseEst, st->idchan, element_mode, element_mode );
    }

    test();
@@ -913,18 +913,18 @@ ivas_error pre_proc_front_ivas_fx(
     *----------------------------------------------------------------*/

    noise_est_down_ivas_fx( fr_bands_fx, fr_bands_fx_q, st->hNoiseEst->bckr_fx, &st->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band,
                            &st->hNoiseEst->totalNoise_fx, Etot_fx, &st->hNoiseEst->Etot_last_fx, &st->hNoiseEst->Etot_v_h2_fx );
                            &st->hNoiseEst->totalNoise_fx, Etot_fx, &st->hNoiseEst->Etot_last_32fx, &st->hNoiseEst->Etot_v_h2_fx );

    test();
    IF( lr_vad_enabled && st->idchan == 0 )
    {
        noise_est_down_ivas_fx( fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], hCPE->hFrontVad[0]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[0]->hNoiseEst->q_bckr, tmpN_LR_fx[0], &q_tmpN_LR[0], tmpE_LR_fx[0], &q_tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx, Etot_LR_fx[0], &hCPE->hFrontVad[0]->hNoiseEst->Etot_last_fx, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx );
        noise_est_down_ivas_fx( fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], hCPE->hFrontVad[1]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[1]->hNoiseEst->q_bckr, tmpN_LR_fx[1], &q_tmpN_LR[1], tmpE_LR_fx[1], &q_tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx, Etot_LR_fx[1], &hCPE->hFrontVad[1]->hNoiseEst->Etot_last_fx, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx );
        noise_est_down_ivas_fx( fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], hCPE->hFrontVad[0]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[0]->hNoiseEst->q_bckr, tmpN_LR_fx[0], &q_tmpN_LR[0], tmpE_LR_fx[0], &q_tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx, L_deposit_h( Etot_LR_fx[0] ), &hCPE->hFrontVad[0]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx );
        noise_est_down_ivas_fx( fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], hCPE->hFrontVad[1]->hNoiseEst->bckr_fx, &hCPE->hFrontVad[1]->hNoiseEst->q_bckr, tmpN_LR_fx[1], &q_tmpN_LR[1], tmpE_LR_fx[1], &q_tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx, L_deposit_h( Etot_LR_fx[1] ), &hCPE->hFrontVad[1]->hNoiseEst->Etot_last_32fx, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx );

        corr_shiftL_fx = correlation_shift_fx( hCPE->hFrontVad[0]->hNoiseEst->totalNoise_fx ); // Q15
        corr_shiftR_fx = correlation_shift_fx( hCPE->hFrontVad[1]->hNoiseEst->totalNoise_fx ); // Q15
    }
    *relE_fx = sub( Etot_fx, st->lp_speech_fx ); // Q8
    *relE_fx = sub( extract_h( Etot_fx ), st->lp_speech_fx ); // Q8
    move16();

    corr_shift_fx = correlation_shift_fx( st->hNoiseEst->totalNoise_fx ); /* Q15 */
@@ -1307,8 +1307,8 @@ ivas_error pre_proc_front_ivas_fx(
    move16();
    move16();

    noise_est_ivas_fx( st, old_pitch1, tmpN_fx, epsP_fx, Etot_fx, *relE_fx, corr_shift_fx, tmpE_fx, q_tmpE, fr_bands_fx, fr_bands_fx_q, cor_map_sum_fx,
                       &ncharX_fx, &sp_div_fx, &q_sp_div, &non_staX_fx, loc_harm, lf_E_fx, q_lf_E_fx, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp_fx,
    noise_est_ivas_fx( st, old_pitch1, tmpN_fx, epsP_fx, extract_h( Etot_fx ), *relE_fx, corr_shift_fx, tmpE_fx, q_tmpE, fr_bands_fx, fr_bands_fx_q, cor_map_sum_fx,
                       &ncharX_fx, &sp_div_fx, &q_sp_div, &non_staX_fx, loc_harm, lf_E_fx, q_lf_E_fx, &st->hNoiseEst->harm_cor_cnt, extract_h( st->hNoiseEst->Etot_l_lp_32fx ),
                       st->hNoiseEst->Etot_v_h2_fx, &st->hNoiseEst->bg_cnt, st->lgBin_E_fx, &dummy_fx, S_map_fx,
                       hStereoClassif, NULL, st->ini_frame );

@@ -1352,14 +1352,14 @@ ivas_error pre_proc_front_ivas_fx(
        noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[0], epsP_fx, Etot_LR_fx[0], sub( Etot_LR_fx[0], hCPE->hFrontVad[0]->lp_speech_fx ), corr_shiftL_fx,
                           tmpE_LR_fx[0], q_tmpE_LR[0], fr_bands_LR_fx[0], fr_bands_LR_fx_q[0], &cor_map_sum_LR_fx[0], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR,
                           &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[0], lf_E_LR_fx_q, &hCPE->hFrontVad[0]->hNoiseEst->harm_cor_cnt,
                           hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp_fx, hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[0]->hNoiseEst->bg_cnt,
                           extract_h( hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp_32fx ), hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[0]->hNoiseEst->bg_cnt,
                           st->lgBin_E_fx, &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[0], hCPE->hFrontVad[0]->ini_frame );

        /* Note: the index [0] in the last argument is intended, the ini_frame counter is only maintained in the zero-th channel's VAD handle */
        noise_est_ivas_fx( st, old_pitch1, tmpN_LR_fx[1], epsP_fx, Etot_LR_fx[1], sub( Etot_LR_fx[1], hCPE->hFrontVad[1]->lp_speech_fx ), corr_shiftR_fx,
                           tmpE_LR_fx[1], q_tmpE_LR[1], fr_bands_LR_fx[1], fr_bands_LR_fx_q[1], &cor_map_sum_LR_fx[1], &ncharX_LR_fx, &sp_div_LR_fx, &q_sp_div_LR,
                           &non_staX_LR_fx, loc_harmLR_fx, lf_E_LR_fx[1], lf_E_LR_fx_q, &hCPE->hFrontVad[1]->hNoiseEst->harm_cor_cnt,
                           hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp_fx, hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[1]->hNoiseEst->bg_cnt,
                           extract_h( hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp_32fx ), hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2_fx, &hCPE->hFrontVad[1]->hNoiseEst->bg_cnt,
                           st->lgBin_E_fx, &dummy_fx, S_map_LR_fx, NULL, hCPE->hFrontVad[1], hCPE->hFrontVad[0]->ini_frame );
    }

@@ -1387,7 +1387,7 @@ ivas_error pre_proc_front_ivas_fx(
    find_tilt_ivas_fx( fr_bands_fx, fr_bands_fx_q, st->hNoiseEst->bckr_fx, st->hNoiseEst->q_bckr, ee_fx, st->pitch, st->voicing_fx, lf_E_fx, q_lf_E_fx,
                       corr_shift_fx, st->input_bwidth, st->max_band, hp_E_fx, MODE1, &( st->bckr_tilt_lt ), st->Opt_SC_VBR );

    st->coder_type = find_uv_ivas_fx( st, pitch_fr_fx, voicing_fr_fx, inp_12k8_fx, ee_fx, &dE1X_fx, corr_shift_fx, *relE_fx, Etot_fx, hp_E_fx,
    st->coder_type = find_uv_ivas_fx( st, pitch_fr_fx, voicing_fr_fx, inp_12k8_fx, ee_fx, &dE1X_fx, corr_shift_fx, *relE_fx, extract_h( Etot_fx ), hp_E_fx,
                                      &flag_spitch, last_core_orig, hStereoClassif, *Q_new /*q_inp_12k8*/, fr_bands_fx_q ); // Q0
    Copy_Scale_sig_16_32_no_sat( st->lgBin_E_fx, st->Bin_E_fx, L_FFT / 2, sub( st->q_Bin_E, Q7 ) );

@@ -1461,7 +1461,7 @@ ivas_error pre_proc_front_ivas_fx(
    scale_sig32( PS_fx, 128, shift );
    Qfact_PS = add( Qfact_PS, shift );

    smc_dec = ivas_smc_gmm_fx( st, hStereoClassif, localVAD_HE_SAD, Etot_fx, lsp_new_fx, *cor_map_sum_fx /*Q8*/, epsP_fx, PS_fx, non_staX_fx, *relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, *epsP_fx_q, hSpMusClas->past_PS_Q ); /* Q0 */
    smc_dec = ivas_smc_gmm_fx( st, hStereoClassif, localVAD_HE_SAD, extract_h( Etot_fx ), lsp_new_fx, *cor_map_sum_fx /*Q8*/, epsP_fx, PS_fx, non_staX_fx, *relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, *epsP_fx_q, hSpMusClas->past_PS_Q ); /* Q0 */

#ifdef DEBUG_FORCE_DIR
    if ( st->force_dir[0] != '\0' )
@@ -1495,7 +1495,7 @@ ivas_error pre_proc_front_ivas_fx(
     * Update of old per-band energy spectrum
     *----------------------------------------------------------------*/

    ivas_long_enr_fx( st, Etot_fx, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL );
    ivas_long_enr_fx( st, extract_h( Etot_fx ), localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL );

    Copy32( fr_bands_fx + NB_BANDS, st->hNoiseEst->enrO_fx, NB_BANDS ); /* fr_bands_fx_q */
    st->hNoiseEst->q_enrO = fr_bands_fx_q;
@@ -1676,7 +1676,7 @@ ivas_error pre_proc_front_ivas_fx(
        st->q_Bin_E = add( *Q_new, Q_SCALE - 2 );
        move16();
        /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */
        ivas_smc_mode_selection_fx( st, element_brate, smc_dec, *relE_fx, Etot_fx, attack_flag, inp_12k8_fx, *Q_new, S_map_fx, flag_spitch );
        ivas_smc_mode_selection_fx( st, element_brate, smc_dec, *relE_fx, extract_h( Etot_fx ), attack_flag, inp_12k8_fx, *Q_new, S_map_fx, flag_spitch );
    }

#ifdef DEBUG_FORCE_DIR
+2 −2
Original line number Diff line number Diff line
@@ -735,8 +735,8 @@ ivas_error ivas_cpe_enc_fx(
            move16();
        }

        Etot_last_fx[0] = sts[0]->hNoiseEst->Etot_last_fx; /* Q8 */
        Etot_last_fx[1] = sts[1]->hNoiseEst->Etot_last_fx; /* Q8 */
        Etot_last_fx[0] = extract_h( sts[0]->hNoiseEst->Etot_last_32fx ); /* Q8 */
        Etot_last_fx[1] = extract_h( sts[1]->hNoiseEst->Etot_last_32fx ); /* Q8 */
        move16();
        move16();
    }
+7 −5
Original line number Diff line number Diff line
@@ -90,6 +90,7 @@ ivas_error front_vad_fx(
    ivas_error error;
    Word16 Q_new;
    Word16 Qband, mem_decim_size;
    Word32 Etot_LR_32fx;
    error = IVAS_ERR_OK;
    push_wmops( "front_vad" );
    move16();
@@ -248,7 +249,7 @@ ivas_error front_vad_fx(
            move16();

            ivas_analy_sp_fx( IVAS_CPE_TD, hCPE, sts[0]->input_Fs, hFrontVad->buffer_12k8_fx + L_FFT / 2 - 3 * ( L_SUBFR / 2 ), Q_new, fr_bands_fx[n],
                              &q_fr_bands[n], lf_E_fx[n], &q_lf_E[n], &Etot_LR_fx[n], sts[0]->min_band, sts[0]->max_band, Bin_E_fx, &q_Bin_E, Bin_E_old_fx,
                              &q_fr_bands[n], lf_E_fx[n], &q_lf_E[n], &Etot_LR_32fx, sts[0]->min_band, sts[0]->max_band, Bin_E_fx, &q_Bin_E, Bin_E_old_fx,
                              &q_Bin_E_old, PS_fx, q_PS_out, lgBin_E_fx, band_energies_fx, &q_band_energies, fft_buffLR_fx, &q_fft_buffLR );
            if ( n == 0 )
            {
@@ -275,9 +276,10 @@ ivas_error front_vad_fx(

            /* add up energies for later calculating average of channel energies */

            Word32 Etot_fx = L_deposit_h( Etot_LR_fx[n] ); /* Q24 */
            noise_est_pre_32fx( Etot_LR_32fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 );

            noise_est_pre_32fx( Etot_fx, hFrontVads[0]->ini_frame, hFrontVad->hNoiseEst, 0, 0, 0 );
            Etot_LR_fx[n] = extract_h( Etot_LR_32fx );
            move16();

            /* wb_vad */
            Word16 scale = s_min( q_fr_bands[n], add( hFrontVads[n]->hNoiseEst->q_enrO, L_norm_arr( hFrontVads[n]->hNoiseEst->enrO_fx, NB_BANDS ) ) );
@@ -611,7 +613,7 @@ ivas_error front_vad_spar_fx(

        Word16 q_tmpN, q_tmpE;
        noise_est_down_ivas_fx( fr_bands_fx[0], q_fr_bands[0], hFrontVad->hNoiseEst->bckr_fx, &hFrontVad->hNoiseEst->q_bckr, tmpN_fx, &q_tmpN, tmpE_fx, &q_tmpE, st->min_band, st->max_band,
                                &hFrontVad->hNoiseEst->totalNoise_fx, Etot_fx[0], &hFrontVad->hNoiseEst->Etot_last_fx, &hFrontVad->hNoiseEst->Etot_v_h2_fx );
                                &hFrontVad->hNoiseEst->totalNoise_fx, Etot_fx[0], &hFrontVad->hNoiseEst->Etot_last_32fx, &hFrontVad->hNoiseEst->Etot_v_h2_fx );

        corr_shift_fx = correlation_shift_fx( hFrontVad->hNoiseEst->totalNoise_fx ); /* Q15 */

@@ -753,7 +755,7 @@ ivas_error front_vad_spar_fx(

        noise_est_ivas_fx( st, old_pitch, tmpN_fx, epsP_fx, Etot_fx[0], sub( Etot_fx[0], hFrontVad->lp_speech_fx ), corr_shift_fx, tmpE_fx,
                           hFrontVad->hNoiseEst->ave_enr_q, fr_bands_fx[0], q_fr_bands[0], &cor_map_sum_fx, NULL, &sp_div_fx, &Q_sp_div, &non_staX_fx, &loc_harm,
                           lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, hFrontVad->hNoiseEst->Etot_l_lp_fx, hFrontVad->hNoiseEst->Etot_v_h2_fx,
                           lf_E_fx[0], q_lf_E[0], &hFrontVad->hNoiseEst->harm_cor_cnt, extract_h( hFrontVad->hNoiseEst->Etot_l_lp_32fx ), hFrontVad->hNoiseEst->Etot_v_h2_fx,
                           &hFrontVad->hNoiseEst->bg_cnt, st->lgBin_E_fx, &sp_floor, S_map_fx, NULL, hFrontVad, hFrontVad->ini_frame );

        MVR2R_WORD16( st->pitch, st->pitch, 3 );
Loading