From eac9a5b7cfc3ca1fa3c8ad92a69839a1adc79f89 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 3 Oct 2024 12:20:59 +0530 Subject: [PATCH 1/2] MASA enc functions conversion/integration, CPE and ISM float code cleanup [x] ivas_masa_encode: 10 functions converted to fixed point [x] computeReferencePower_omasa converted [x] ism_enc cleanup [x] CPE ENC Cleanup --- lib_com/ivas_qspherical_com.c | 19 + lib_enc/ivas_core_enc.c | 6 - lib_enc/ivas_core_pre_proc_front.c | 30 +- lib_enc/ivas_cpe_enc.c | 83 +-- lib_enc/ivas_front_vad.c | 17 +- lib_enc/ivas_ism_enc.c | 501 +++++++------------ lib_enc/ivas_omasa_enc.c | 92 +++- lib_enc/ivas_qmetadata_enc.c | 621 ++++++++++++++++++++--- lib_enc/ivas_stat_enc.h | 43 +- lib_enc/ivas_stereo_classifier.c | 40 +- lib_enc/ivas_stereo_dft_enc.c | 58 ++- lib_enc/ivas_stereo_icbwe_enc.c | 77 +-- lib_enc/ivas_stereo_switching_enc.c | 6 +- lib_enc/speech_music_classif.c | 751 +--------------------------- lib_enc/speech_music_classif_fx.c | 34 +- lib_enc/stat_enc.h | 1 + lib_enc/swb_pre_proc.c | 3 +- 17 files changed, 1029 insertions(+), 1353 deletions(-) diff --git a/lib_com/ivas_qspherical_com.c b/lib_com/ivas_qspherical_com.c index 0eb7f6a1c..e4ae21079 100644 --- a/lib_com/ivas_qspherical_com.c +++ b/lib_com/ivas_qspherical_com.c @@ -50,7 +50,25 @@ * ivas_qmetadata_reorder_generic() * *------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +UWord16 ivas_qmetadata_reorder_generic( + const Word16 signed_value ) +{ + UWord16 unsigned_value; + + IF( signed_value < 0 ) + { + // unsigned_value = ( ( UWord16 ) - ( signed_value + 1 ) << 1 ) + 1; + unsigned_value = (UWord16) L_add( L_shl( negate( add( signed_value, 1 ) ), 1 ), 1 ); + } + ELSE + { + unsigned_value = (UWord16) L_shl( signed_value, 1 ); + } + return unsigned_value; +} +#else uint16_t ivas_qmetadata_reorder_generic( const int16_t signed_value ) { @@ -67,6 +85,7 @@ uint16_t ivas_qmetadata_reorder_generic( return unsigned_value; } +#endif /*------------------------------------------------------------------------- diff --git a/lib_enc/ivas_core_enc.c b/lib_enc/ivas_core_enc.c index bff86bc97..75395ce84 100644 --- a/lib_enc/ivas_core_enc.c +++ b/lib_enc/ivas_core_enc.c @@ -1407,12 +1407,6 @@ ivas_error ivas_core_enc( IF( EQ_16( st->element_mode, IVAS_CPE_DFT ) ) { - hStereoICBWE->mem_nrg_L[0] = hCPE->hStereoDft->nrg_L[0]; - hStereoICBWE->mem_nrg_R[0] = hCPE->hStereoDft->nrg_R[0]; - hStereoICBWE->mem_nrg_DMX[0] = hCPE->hStereoDft->nrg_DMX[0]; - hStereoICBWE->mem_nrg_L[1] = hCPE->hStereoDft->nrg_L[1]; - hStereoICBWE->mem_nrg_R[1] = hCPE->hStereoDft->nrg_R[1]; - hStereoICBWE->mem_nrg_DMX[1] = hCPE->hStereoDft->nrg_DMX[1]; hStereoICBWE->prevSpecMapping = fixedToFloat( hStereoICBWE->prevSpecMapping_fx, 31 ); IF( ( st->extl == SWB_TBE || st->extl == FB_TBE ) && st->flag_ACELP16k == 1 ) { diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 115f1aae3..797381826 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -2792,17 +2792,9 @@ ivas_error pre_proc_front_ivas_fx( #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 non_staX_e, sp_div_e, epsP_e, max_e_Etot; floatToFixed_arr32( lsf_new, lsf_new_fx, Q15, M ); - f2me( hStereoClassif->ratio_L, &hStereoClassif->ratio_L_fx, &hStereoClassif->ratio_L_e ); - f2me_buf( hStereoClassif->voicing_ch1, hStereoClassif->voicing_ch1_fx, &hStereoClassif->voicing_ch1_e, 3 ); f2me_buf_16( st->voicing, st->voicing_fx, &st->voicing_e, 3 ); f2me( non_staX, &non_staX_fx, &non_staX_e ); f2me_buf( epsP, epsP_fx, &epsP_e, 17 ); - f2me( hStereoClassif->non_sta_ch1, &hStereoClassif->non_sta_ch1_fx, &hStereoClassif->non_sta_ch1_e ); - f2me( hStereoClassif->sp_div_ch1, &hStereoClassif->sp_div_ch1_fx, &hStereoClassif->sp_div_ch1_e ); - f2me( hStereoClassif->ps_diff_ch1, &hStereoClassif->ps_diff_ch1_fx, &hStereoClassif->ps_diff_ch1_e ); - f2me( hStereoClassif->ps_sta_ch1, &hStereoClassif->ps_sta_ch1_fx, &hStereoClassif->ps_sta_ch1_e ); - f2me( hStereoClassif->ps_diff_ch2, &hStereoClassif->ps_diff_ch2_fx, &hStereoClassif->ps_diff_ch2_e ); - f2me( hStereoClassif->ps_sta_ch2, &hStereoClassif->ps_sta_ch2_fx, &hStereoClassif->ps_sta_ch2_e ); f2me( sp_div, &sp_div_fx, &sp_div_e ); #endif // IVAS_FLOAT_FIXED_CONVERSIONS max_e_Etot = max( hStereoClassif->e_Etot_buf_fx, max( hStereoClassif->Etot_up_e, hStereoClassif->Etot_dn_e ) ); @@ -2813,15 +2805,7 @@ ivas_error pre_proc_front_ivas_fx( stereo_classifier_features_ivas_fx( hStereoClassif, st->idchan, element_mode, localVAD_HE_SAD, lsf_new_fx, epsP_fx, st->pitch, st->voicing_fx, cor_map_sum_fx, non_staX_fx, sp_div_fx, st->clas, epsP_e, st->voicing_e, cor_map_sum_e, non_staX_e, sp_div_e ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - me2f_buf( hStereoClassif->voicing_ch1_fx, hStereoClassif->voicing_ch1_e, hStereoClassif->voicing_ch1, 3 ); - hStereoClassif->non_sta_ch1 = me2f( hStereoClassif->non_sta_ch1_fx, hStereoClassif->non_sta_ch1_e ); - hStereoClassif->sp_div_ch1 = me2f( hStereoClassif->sp_div_ch1_fx, hStereoClassif->sp_div_ch1_e ); - hStereoClassif->ps_diff_ch1 = me2f( hStereoClassif->ps_diff_ch1_fx, hStereoClassif->ps_diff_ch1_e ); - hStereoClassif->ps_sta_ch1 = me2f( hStereoClassif->ps_sta_ch1_fx, hStereoClassif->ps_sta_ch1_e ); - hStereoClassif->ps_diff_ch2 = me2f( hStereoClassif->ps_diff_ch2_fx, hStereoClassif->ps_diff_ch2_e ); - hStereoClassif->ps_sta_ch2 = me2f( hStereoClassif->ps_sta_ch2_fx, hStereoClassif->ps_sta_ch2_e ); -#endif + #else stereo_classifier_features( hStereoClassif, st->idchan, element_mode, localVAD_HE_SAD, lsf_new, epsP, st->pitch, st->voicing, *cor_map_sum, non_staX, sp_div, st->clas ); #endif // IVAS_FLOAT_FIXED @@ -2842,7 +2826,7 @@ ivas_error pre_proc_front_ivas_fx( Word16 non_sta_fx = float_to_fix16( non_staX, Q6 ); Word16 Etot_fx_0 = float_to_fix16( Etot, Q8 ); floatToFixed_arr( lsp_new, lsp_new_fx, Q15, M ); - hSpMusClas->wdlp_0_95_sp_fx = float_to_fix16( hSpMusClas->wdlp_0_95_sp, Q8 ); + hSpMusClas->wdlp_0_95_sp_32fx = float_to_fix( hSpMusClas->wdlp_0_95_sp, Q24 ); hSpMusClas->wdlp_xtalk_fx = floatToFixed( hSpMusClas->wdlp_xtalk, Q19 ); hSpMusClas->wrise_fx = float_to_fix16( hSpMusClas->wrise, 9 ); relE_fx = float_to_fix16( *relE, 8 ); @@ -2872,7 +2856,7 @@ ivas_error pre_proc_front_ivas_fx( hSpMusClas->wdrop = fixedToFloat( hSpMusClas->wdrop_fx, Q9 ); // Q8 hSpMusClas->wrise = fixedToFloat( hSpMusClas->wrise_fx, Q9 ); // Q8 hSpMusClas->lt_dec_thres = fixedToFloat( hSpMusClas->lt_dec_thres_fx, Q9 ); // Q8 - hSpMusClas->wdlp_0_95_sp = fixedToFloat( hSpMusClas->wdlp_0_95_sp_fx, Q8 ); + hSpMusClas->wdlp_0_95_sp = fixedToFloat( hSpMusClas->wdlp_0_95_sp_32fx, Q24 ); hSpMusClas->dlp_mean_LT = fixedToFloat_32( hSpMusClas->dlp_mean_LT_fx, Q19 ); hSpMusClas->wdlp_xtalk = fixedToFloat( hSpMusClas->wdlp_xtalk_fx, Q19 ); hSpMusClas->dlp_var_LT = fixedToFloat_32( hSpMusClas->dlp_var_LT_fx, Q19 ); @@ -2884,14 +2868,6 @@ ivas_error pre_proc_front_ivas_fx( fixedToFloat_arrL32( hSpMusClas->prev_FV_fx, hSpMusClas->prev_FV, Q20, 15 ); fixedToFloat_arrL32( hSpMusClas->past_dlp_mean_ST_fx, hSpMusClas->past_dlp_mean_ST, Q19, 7 ); hSpMusClas->dlp_mean_ST = fixedToFloat( hSpMusClas->dlp_mean_ST_fx, Q19 ); - - if ( hStereoClassif != NULL ) - { - hStereoClassif->ps_diff_ch1 = me2f( hStereoClassif->ps_diff_ch1_fx, hStereoClassif->ps_diff_ch1_e ); // Qfact_PS_past - 7 - hStereoClassif->ps_sta_ch1 = me2f( hStereoClassif->ps_sta_ch1_fx, hStereoClassif->ps_sta_ch1_e ); // logf( ps_sta + 1e-5f );Q25 - hStereoClassif->ps_diff_ch2 = me2f( hStereoClassif->ps_diff_ch2_fx, hStereoClassif->ps_diff_ch2_e ); - hStereoClassif->ps_sta_ch2 = me2f( hStereoClassif->ps_sta_ch2_fx, hStereoClassif->ps_sta_ch2_e ); - } #endif #endif diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index 1f37cc90d..0370c37e1 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -211,16 +211,11 @@ ivas_error ivas_cpe_enc( { #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - hCPE->hStereoClassif->is_speech_fx = floatToFixed_32( hCPE->hStereoClassif->is_speech, Q9 ); hCPE->hCoreCoder[0]->hSpMusClas->past_dlp_fx[0] = float_to_fix16( hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0], Q9 ); hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk_fx = floatToFixed( hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk, Q19 ); #endif hCPE->element_mode = select_stereo_mode( hCPE, ivas_format ); - -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - hCPE->hStereoClassif->is_speech = fixedToFloat_32( hCPE->hStereoClassif->is_speech_fx, Q9 ); -#endif #else hCPE->element_mode = select_stereo_mode( hCPE, ivas_format ); #endif @@ -700,34 +695,15 @@ ivas_error ivas_cpe_enc( floatToFixed_arr( &hCPE->hStereoDft->input_mem_itd[i][0], &hCPE->hStereoDft->input_mem_itd_fx[i][0], hCPE->hStereoDft->q_input_mem_itd[i], STEREO_DFT_OVL_MAX ); #endif // MSAN_FIX } - floatToFixed_arrL( hCPE->hStereoDft->side_gain, hCPE->hStereoDft->side_gain_fx, Q31, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX ); - floatToFixed_arrL( hCPE->hStereoDft->gipd, hCPE->hStereoDft->gipd_fx, Q13, STEREO_DFT_ENC_DFT_NB ); - /* flt2fix end */ - - - /*flt2fix: dft_td_itd*/ - for ( i = 0; i < STEREO_DFT_N_32k_ENC; i++ ) - { - f2me( hCPE->hStereoDft->xspec_smooth[i], &hCPE->hStereoDft->xspec_smooth_fx[i], &hCPE->hStereoDft->xspec_smooth_fx_e[i] ); - } f2me_buf( hCPE->hStereoDft->Spd_L_smooth, hCPE->hStereoDft->Spd_L_smooth_fx, &hCPE->hStereoDft->Spd_L_smooth_fx_e, STEREO_DFT_N_32k_ENC / 2 ); f2me_buf( hCPE->hStereoDft->Spd_R_smooth, hCPE->hStereoDft->Spd_R_smooth_fx, &hCPE->hStereoDft->Spd_R_smooth_fx_e, STEREO_DFT_N_32k_ENC / 2 ); + floatToFixed_arrL( hCPE->hStereoDft->hItd->itd, hCPE->hStereoDft->hItd->itd_fx, 16, STEREO_DFT_ENC_DFT_NB ); floatToFixed_arrL( hCPE->hStereoClassif->unclr_fv, hCPE->hStereoClassif->unclr_fv_fx, 15, SSC_MAX_NFEA ); #ifndef MSAN_FIX hCPE->hStereoClassif->xtalk_score_fx = floatToFixed( hCPE->hStereoClassif->xtalk_score, 31 ); #endif // !MSAN_FIX - /*flt2fix: dft_compute_prm*/ - floatToFixed_arrL( hCPE->hStereoDft->sidSideGain, hCPE->hStereoDft->sidSideGain_fx, Q31, STEREO_DFT_ERB4_BANDS ); - hCPE->hStereoDft->sid_gipd_fx = floatToFixed( hCPE->hStereoDft->sid_gipd, Q13 ); - - for ( i = 0; i < 2; i++ ) - { - f2me( hCPE->hStereoDft->nrg_L[i], &hCPE->hStereoDft->nrg_L_fx[i], &hCPE->hStereoDft->nrg_L_fx_e[i] ); - f2me( hCPE->hStereoDft->nrg_R[i], &hCPE->hStereoDft->nrg_R_fx[i], &hCPE->hStereoDft->nrg_R_fx_e[i] ); - f2me( hCPE->hStereoDft->nrg_DMX[i], &hCPE->hStereoDft->nrg_DMX_fx[i], &hCPE->hStereoDft->nrg_DMX_fx_e[i] ); - } for ( i = 0; i < STEREO_DFT_BAND_MAX; i++ ) { f2me( hCPE->hStereoDft->res_cod_NRG_M[i], &hCPE->hStereoDft->res_cod_NRG_M_fx[i], &hCPE->hStereoDft->res_cod_NRG_M_fx_e[i] ); @@ -751,6 +727,7 @@ ivas_error ivas_cpe_enc( floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k, hCPE->hStereoDft->output_mem_dmx_16k_fx, 16, STEREO_DFT_OVL_16k ); floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb, hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, 16, STEREO_DFT_OVL_16k ); floatToFixed_arrL( hCPE->hStereoDft->output_mem_dmx_32k, hCPE->hStereoDft->output_mem_dmx_32k_fx, 16, STEREO_DFT_OVL_32k ); + /*flt2fix end*/ #endif #ifdef IVAS_FLOAT_FIXED @@ -877,18 +854,18 @@ ivas_error ivas_cpe_enc( hCPE->hStereoTD->tdm_last_ratio_fx = floatToFixed( hCPE->hStereoTD->tdm_last_ratio, Q31 ); hCPE->hStereoTD->tdm_last_ratio_SM_fx = floatToFixed( hCPE->hStereoTD->tdm_last_ratio_SM, Q31 ); #endif - Word16 tmp; + Word16 tdm_SM_flag; IF( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) { - tmp = tdm_SM_or_LRTD_Pri; + tdm_SM_flag = tdm_SM_or_LRTD_Pri; move16(); } ELSE { - tmp = 0; + tdm_SM_flag = 0; move16(); } - stereo_tdm_downmix_ivas_fx( hCPE->hStereoTD, input_fx[0], input_fx[1], input_frame, tdm_ratio_idx, tmp, tdm_ratio_idx_SM ); + stereo_tdm_downmix_ivas_fx( hCPE->hStereoTD, input_fx[0], input_fx[1], input_frame, tdm_ratio_idx, tdm_SM_flag, tdm_ratio_idx_SM ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS hCPE->hStereoTD->tdm_last_ratio = fixedToFloat( hCPE->hStereoTD->tdm_last_ratio_fx, Q31 ); @@ -1008,48 +985,20 @@ ivas_error ivas_cpe_enc( #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 i; - /* fix2flt: to be removed */ - fixedToFloat_arrL( hCPE->hStereoDft->side_gain_fx, hCPE->hStereoDft->side_gain, Q31, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX ); - fixedToFloat_arrL( hCPE->hStereoDft->gipd_fx, hCPE->hStereoDft->gipd, Q13, STEREO_DFT_ENC_DFT_NB ); FOR( i = 0; i < CPE_CHANNELS; i++ ) { - // fixedToFloat_arr( sts[i]->input_fx, sts[i]->input, sts[i]->q_inp, 1965 ); - // fixedToFloat_arr(sts[i]->old_input_signal_fx, sts[i]->old_input_signal, sts[i]->q_old_inp, 1965); fixedToFloat_arr( &hCPE->input_mem_fx[i][0], &hCPE->input_mem[i][0], hCPE->q_input_mem[i], NS2SA( input_Fs, STEREO_DFT_OVL_NS ) ); me2f_buf( hCPE->hStereoDft->DFT_fx[i], hCPE->hStereoDft->DFT_fx_e[i], hCPE->hStereoDft->DFT[i], STEREO_DFT_N_MAX_ENC ); -#ifdef MSAN_FIX - IF( hCPE->hStereoDft->hItd->td_itd[STEREO_DFT_OFFSET] != 0 && EQ_16( sts[0]->element_mode, IVAS_CPE_DFT ) ) - { - fixedToFloat_arr( &hCPE->hStereoDft->input_mem_itd_fx[i][0], &hCPE->hStereoDft->input_mem_itd[i][0], hCPE->hStereoDft->q_input_mem_itd[i], hCPE->hStereoDft->dft_ovl ); - } -#else - fixedToFloat_arr( &hCPE->hStereoDft->input_mem_itd_fx[i][0], &hCPE->hStereoDft->input_mem_itd[i][0], hCPE->hStereoDft->q_input_mem_itd[i], STEREO_DFT_OVL_MAX ); -#endif // MSAN_FIX - } - /* fix2flt end */ - - - /*fix2flt: dft_td_itd*/ - for ( i = 0; i < STEREO_DFT_N_32k_ENC; i++ ) - { - hCPE->hStereoDft->xspec_smooth[i] = me2f( hCPE->hStereoDft->xspec_smooth_fx[i], hCPE->hStereoDft->xspec_smooth_fx_e[i] ); } me2f_buf( hCPE->hStereoDft->Spd_L_smooth_fx, hCPE->hStereoDft->Spd_L_smooth_fx_e, hCPE->hStereoDft->Spd_L_smooth, STEREO_DFT_N_32k_ENC / 2 ); me2f_buf( hCPE->hStereoDft->Spd_R_smooth_fx, hCPE->hStereoDft->Spd_R_smooth_fx_e, hCPE->hStereoDft->Spd_R_smooth, STEREO_DFT_N_32k_ENC / 2 ); + fixedToFloat_arrL( hCPE->hStereoDft->hItd->itd_fx, hCPE->hStereoDft->hItd->itd, 16, STEREO_DFT_ENC_DFT_NB ); fixedToFloat_arrL( hCPE->hStereoClassif->unclr_fv_fx, hCPE->hStereoClassif->unclr_fv, 15, SSC_MAX_NFEA ); - fixedToFloat_arrL( hCPE->hStereoDft->sidSideGain_fx, hCPE->hStereoDft->sidSideGain, Q31, STEREO_DFT_ERB4_BANDS ); - hCPE->hStereoDft->sid_gipd = fixedToFloat( hCPE->hStereoDft->sid_gipd_fx, Q13 ); /*local fix2flt*/ - for ( i = 0; i < 2; i++ ) - { - hCPE->hStereoDft->nrg_L[i] = me2f( hCPE->hStereoDft->nrg_L_fx[i], hCPE->hStereoDft->nrg_L_fx_e[i] ); - hCPE->hStereoDft->nrg_R[i] = me2f( hCPE->hStereoDft->nrg_R_fx[i], hCPE->hStereoDft->nrg_R_fx_e[i] ); - hCPE->hStereoDft->nrg_DMX[i] = me2f( hCPE->hStereoDft->nrg_DMX_fx[i], hCPE->hStereoDft->nrg_DMX_fx_e[i] ); - } for ( i = 0; i < STEREO_DFT_BAND_MAX; i++ ) { hCPE->hStereoDft->res_cod_NRG_M[i] = me2f( hCPE->hStereoDft->res_cod_NRG_M_fx[i], hCPE->hStereoDft->res_cod_NRG_M_fx_e[i] ); @@ -1073,6 +1022,7 @@ ivas_error ivas_cpe_enc( fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_fx, hCPE->hStereoDft->output_mem_dmx_16k, 16, STEREO_DFT_OVL_16k ); fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_16k_shb_fx, hCPE->hStereoDft->output_mem_dmx_16k_shb, 16, STEREO_DFT_OVL_16k ); fixedToFloat_arrL( hCPE->hStereoDft->output_mem_dmx_32k_fx, hCPE->hStereoDft->output_mem_dmx_32k, 16, STEREO_DFT_OVL_32k ); + /*fix2flt end*/ #endif #else @@ -1410,21 +1360,7 @@ ivas_error ivas_cpe_enc( } ELSE { -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - /*flt2fix: dft_td_itd*/ - for ( Word16 i = 0; i < STEREO_DFT_N_32k_ENC; i++ ) - { - f2me( hCPE->hStereoDft->xspec_smooth[i], &hCPE->hStereoDft->xspec_smooth_fx[i], &hCPE->hStereoDft->xspec_smooth_fx_e[i] ); - } -#endif stereo_dft_enc_write_BS_fx( hCPE, &nb_bits ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - /*fix2flt: dft_td_itd*/ - for ( Word16 i = 0; i < STEREO_DFT_N_32k_ENC; i++ ) - { - hCPE->hStereoDft->xspec_smooth[i] = me2f( hCPE->hStereoDft->xspec_smooth_fx[i], hCPE->hStereoDft->xspec_smooth_fx_e[i] ); - } -#endif } /* Residual coding in MDCT domain */ @@ -2580,6 +2516,9 @@ ivas_error create_cpe_enc( } stereo_icBWE_init_enc( hCPE->hStereoICBWE ); +#ifdef IVAS_FLOAT_FIXED + stereo_icBWE_init_enc_fx( hCPE->hStereoICBWE ); +#endif } /*-----------------------------------------------------------------* diff --git a/lib_enc/ivas_front_vad.c b/lib_enc/ivas_front_vad.c index 1818b6fe5..ea8c1a5c6 100644 --- a/lib_enc/ivas_front_vad.c +++ b/lib_enc/ivas_front_vad.c @@ -1274,7 +1274,7 @@ ivas_error front_vad_spar( // Word32 epsP_fx[M + 1]; Word16 Etot_fx_0 = float_to_fix16( Etot[0], Q8 ); floatToFixed_arr( lsp_new, lsp_new_fx, Q15, M ); - hSpMusClas->wdlp_0_95_sp_fx = float_to_fix16( hSpMusClas->wdlp_0_95_sp, Q8 ); + hSpMusClas->wdlp_0_95_sp_32fx = float_to_fix( hSpMusClas->wdlp_0_95_sp, Q24 ); hSpMusClas->wdlp_xtalk_fx = floatToFixed( hSpMusClas->wdlp_xtalk, Q19 ); hSpMusClas->wrise_fx = float_to_fix16( hSpMusClas->wrise, 9 ); relE_fx = float_to_fix16( relE, 8 ); @@ -1293,17 +1293,18 @@ ivas_error front_vad_spar( hSpMusClas->dlp_mean_ST_fx = float_to_fix( hSpMusClas->dlp_mean_ST, Q19 ); floatToFixed_arr32( hSpMusClas->past_dlp_mean_ST, hSpMusClas->past_dlp_mean_ST_fx, Q19, 7 ); floatToFixed_arr32( hSpMusClas->prev_FV, hSpMusClas->prev_FV_fx, Q20, 15 ); + floatToFixed_arrL( hSpMusClas->FV_st, hSpMusClas->FV_st_fx, Q20, 15 ); #endif ivas_smc_gmm_fx( st, NULL, localVAD_HE_SAD[0], Etot_fx_0, lsp_new_fx, cor_map_sum_fx, epsP_fx, PS_fx, non_sta_fx, relE_fx, &high_lpn_flag, flag_spitch, Qfact_PS, Q_esp, Qfact_PS_past ); #if 1 fixedToFloat_arr( hSpMusClas->past_dlp_fx, hSpMusClas->past_dlp, Q9, HANG_LEN - 1 ); - hSpMusClas->lpm = fixedToFloat( hSpMusClas->lpm_fx, Q7 ); // Q7 - hSpMusClas->lps = fixedToFloat( hSpMusClas->lps_fx, Q7 ); // Q7 - hSpMusClas->lpn = fixedToFloat( hSpMusClas->lpn_fx, Q7 ); // Q7 - hSpMusClas->wdrop = fixedToFloat( hSpMusClas->wdrop_fx, Q9 ); // Q8 - hSpMusClas->wrise = fixedToFloat( hSpMusClas->wrise_fx, Q9 ); // Q8 - hSpMusClas->lt_dec_thres = fixedToFloat( hSpMusClas->lt_dec_thres_fx, Q9 ); // Q8 - hSpMusClas->wdlp_0_95_sp = fixedToFloat( hSpMusClas->wdlp_0_95_sp_fx, Q8 ); + hSpMusClas->lpm = fixedToFloat( hSpMusClas->lpm_fx, Q7 ); // Q7 + hSpMusClas->lps = fixedToFloat( hSpMusClas->lps_fx, Q7 ); // Q7 + hSpMusClas->lpn = fixedToFloat( hSpMusClas->lpn_fx, Q7 ); // Q7 + hSpMusClas->wdrop = fixedToFloat( hSpMusClas->wdrop_fx, Q9 ); // Q9 + hSpMusClas->wrise = fixedToFloat( hSpMusClas->wrise_fx, Q9 ); // Q9 + hSpMusClas->lt_dec_thres = fixedToFloat( hSpMusClas->lt_dec_thres_fx, Q9 ); // Q9 + hSpMusClas->wdlp_0_95_sp = fixedToFloat( hSpMusClas->wdlp_0_95_sp_32fx, Q24 ); // Q24 hSpMusClas->dlp_mean_LT = fixedToFloat_32( hSpMusClas->dlp_mean_LT_fx, Q19 ); hSpMusClas->wdlp_xtalk = fixedToFloat( hSpMusClas->wdlp_xtalk_fx, Q19 ); hSpMusClas->dlp_var_LT = fixedToFloat_32( hSpMusClas->dlp_var_LT_fx, Q19 ); diff --git a/lib_enc/ivas_ism_enc.c b/lib_enc/ivas_ism_enc.c index 423bd5f98..bc66cd985 100644 --- a/lib_enc/ivas_ism_enc.c +++ b/lib_enc/ivas_ism_enc.c @@ -464,6 +464,7 @@ ivas_error ivas_ism_enc( float cor_map_sum[MAX_NUM_OBJECTS][1]; /* speech/music clasif. parameter */ Word16 vad_flag_dtx[MAX_NUM_OBJECTS][1]; /* HE-SAD flag with additional DTX HO */ float enerBuffer[MAX_NUM_OBJECTS][1][CLDFB_NO_CHANNELS_MAX]; /* energy buffer */ + Word16 currFlatness_fx[1]; /* flatness parameter */ float currFlatness[1]; /* flatness parameter */ float fft_buff[MAX_NUM_OBJECTS][1][2 * L_FFT]; /* FFT buffer */ Word16 fft_buff_fx[MAX_NUM_OBJECTS][1][2 * L_FFT]; /* FFT buffer */ @@ -555,7 +556,18 @@ ivas_error ivas_ism_enc( *---------------------------------------------------------------*/ RunTransientDetection( st->input, input_frame, st->hTranDet ); +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + Word16 e_tmp; + f2me_buf_16( st->hTranDet->transientDetector.pSubblockEnergies->subblockNrgChange_flt, st->hTranDet->transientDetector.pSubblockEnergies->subblockNrgChange, &e_tmp, 24 ); +#endif + currFlatness_fx[0] = GetTCXAvgTemporalFlatnessMeasure_fx( (const TransientDetection *) st->hTranDet, NSUBBLOCKS, 0 ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + currFlatness[0] = me2f_16( currFlatness_fx[0], e_tmp ); +#endif +#else currFlatness[0] = GetTCXAvgTemporalFlatnessMeasure( st->hTranDet, NSUBBLOCKS, 0 ); +#endif /*----------------------------------------------------------------* * Configuration of core encoder @@ -596,28 +608,53 @@ ivas_error ivas_ism_enc( * DTX analysis *-----------------------------------------------------------------*/ - IF( st_ivas->hEncoderConfig->Opt_DTX_ON ) +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + IF( st_ivas->hIsmMetaData != NULL ) { - /* compute the dominant sce_id using long term energy */ -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS /************************flt_to_fix****************************/ - for ( int j = 0; j < st_ivas->nchan_transport; j++ ) - { - floatToFixed_arrL( st_ivas->hSCE[j]->hCoreCoder[0]->input, st_ivas->hSCE[j]->hCoreCoder[0]->input32_fx, Q11, input_frame ); /*Q0*/ - } - f2me_buf( &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc[0][0], &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_fx[0][0], &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_e, st_ivas->nchan_transport * PARAM_ISM_HYS_BUF_SIZE ); - for ( int ch = 0; ch < nchan_ism; ch++ ) - { - st_ivas->hIsmMetaData[ch]->azimuth_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->last_azimuth_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->elevation_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); - st_ivas->hIsmMetaData[ch]->last_elevation_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_elevation, Q22 ); - } - for ( int ch = 0; ch < st_ivas->nchan_transport; ch++ ) + FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) { - st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise_fx = float_to_fix16( st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise, Q8 ); /*Q8*/ + st_ivas->hIsmMetaData[ch]->azimuth_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); + st_ivas->hIsmMetaData[ch]->elevation_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); + st_ivas->hIsmMetaData[ch]->yaw_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->yaw, Q22 ); + st_ivas->hIsmMetaData[ch]->pitch_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->pitch, Q22 ); + st_ivas->hIsmMetaData[ch]->radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->radius, Q9 ); + st_ivas->hIsmMetaData[ch]->last_true_azimuth_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_true_azimuth, Q22 ); + st_ivas->hIsmMetaData[ch]->last_true_elevation_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_true_elevation, Q22 ); + st_ivas->hIsmMetaData[ch]->last_true_radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->last_true_radius, Q9 ); } + } + if ( st_ivas->hISMDTX ) + { floatToFixed_arr16( st_ivas->hISMDTX->coh, st_ivas->hISMDTX->coh_fx, Q15, st_ivas->nchan_transport ); -#endif /****************************ends here*********************************/ + } + for ( int ch = 0; ch < st_ivas->nchan_transport; ch++ ) + { + IF( st_ivas->hSCE[ch] ) + st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise_fx = float_to_fix16( st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise, Q8 ); /*Q8*/ + } + + /* compute the dominant sce_id using long term energy */ + for ( int j = 0; j < st_ivas->nchan_transport; j++ ) + { + IF( st_ivas->hSCE[j] && st_ivas->hSCE[j]->hCoreCoder[0] ) + floatToFixed_arrL( st_ivas->hSCE[j]->hCoreCoder[0]->input, st_ivas->hSCE[j]->hCoreCoder[0]->input32_fx, Q11, input_frame ); /*Q0*/ + } + IF( st_ivas->hISMDTX ) + f2me_buf( &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc[0][0], &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_fx[0][0], &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_e, st_ivas->nchan_transport * PARAM_ISM_HYS_BUF_SIZE ); + + IF( st_ivas->hMasa != NULL ) + { + st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx = floatToFixed( st_ivas->hMasa->data.hOmasaData->lp_noise_CPE, Q8 ); + } + + FOR( sce_id = 0; sce_id < nchan_transport_ism; sce_id++ ) + { + relE_fx[sce_id][0] = float_to_fix16( relE[sce_id][0], Q8 ); + } +#endif + + IF( st_ivas->hEncoderConfig->Opt_DTX_ON ) + { ivas_ism_get_sce_id_dtx_fx( st_ivas->hISMDTX, st_ivas->hSCE, st_ivas->nchan_transport, input_frame ); dtx_flag = ivas_ism_dtx_enc_fx( st_ivas->hISMDTX, st_ivas->hSCE, st_ivas->hEncoderConfig->ivas_total_brate, nchan_ism, st_ivas->nchan_transport, vad_flag, st_ivas->hIsmMetaData, md_diff_flag, &sid_flag ); @@ -627,10 +664,6 @@ ivas_error ivas_ism_enc( /* estimate coherence between objects */ ivas_ism_coh_estim_dtx_enc_fx( st_ivas->hISMDTX, st_ivas->hSCE, st_ivas->nchan_transport, input_frame ); } -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS /************************fix_to_flt****************************/ - me2f_buf( &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_fx[0][0], st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_e, &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc[0][0], st_ivas->nchan_transport * PARAM_ISM_HYS_BUF_SIZE ); - fixedToFloat_arr( st_ivas->hISMDTX->coh_fx, st_ivas->hISMDTX->coh, Q15, st_ivas->nchan_transport ); -#endif /****************************ends here*********************************/ } /*------------------------------------------------------------------* @@ -647,353 +680,195 @@ ivas_error ivas_ism_enc( IF( dtx_flag ) { -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - /*================flt-to-fix===================================*/ - if ( sid_flag ) - { - if ( GT_16( st_ivas->nchan_transport, 1 ) ) - { - /* write sce id */ - /* quantize and write coherence */ - floatToFixed_arr16( st_ivas->hISMDTX->coh, st_ivas->hISMDTX->coh_fx, Q15, st_ivas->nchan_transport ); - } - } - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) - { - st_ivas->hIsmMetaData[ch]->azimuth_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->elevation_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); - } - /*================flt-to-fix===================================*/ -#endif ivas_ism_metadata_sid_enc( st_ivas->hISMDTX, flag_noisy_speech, nchan_ism, st_ivas->nchan_transport, st_ivas->ism_mode, st_ivas->hIsmMetaData, sid_flag, md_diff_flag, st_ivas->hSCE[st_ivas->nSCE - 1]->hMetaData, nb_bits_metadata ); } - ELSE IF( EQ_32( st_ivas->ism_mode, ISM_MODE_PARAM ) ){ -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - IF( NE_16( st_ivas->ism_mode, ISM_MASA_MODE_DISC ) || NE_16( st_ivas->ism_mode, ISM_MASA_MODE_MASA_ONE_OBJ ) ){ - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ){ - IF( EQ_16( st_ivas->ism_mode, ISM_MODE_DISC ) || EQ_16( st_ivas->ism_mode, ISM_MASA_MODE_DISC ) || EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ){ - IF( EQ_16( st_ivas->hIsmMetaData[ch]->ism_metadata_flag, 1 ) ){ - IF( NE_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ){ - st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise_fx = float_to_fix16( st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise, Q8 ); -} -IF( st_ivas->hIsmMetaData[ch]->ism_metadata_flag == 0 ) -{ - st_ivas->hIsmMetaData[ch]->azimuth_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_azimuth_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_true_azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->elevation_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_elevation_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_true_elevation, Q22 ); - st_ivas->hIsmMetaData[ch]->radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->radius, Q9 ); - st_ivas->hIsmMetaData[ch]->last_true_radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->last_true_radius, Q9 ); -} -} -} -} -} -IF( st_ivas->hIsmMetaData != NULL ) -{ - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) + ELSE IF( EQ_32( st_ivas->ism_mode, ISM_MODE_PARAM ) ) { - st_ivas->hIsmMetaData[ch]->azimuth_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->elevation_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); - st_ivas->hIsmMetaData[ch]->yaw_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->yaw, Q22 ); - st_ivas->hIsmMetaData[ch]->pitch_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->pitch, Q22 ); - st_ivas->hIsmMetaData[ch]->radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->radius, Q9 ); - } -} -#endif - -IF( NE_32( ( error = ivas_ism_metadata_enc( &st_ivas->hEncoderConfig->ivas_total_brate, nchan_ism, nchan_transport_ism, st_ivas->hIsmMetaData, st_ivas->hSCE, st_ivas->hSCE[st_ivas->nSCE - 1]->hMetaData, nb_bits_metadata, vad_flag, st_ivas->ism_mode, st_ivas->hParamIsm, st_ivas->hEncoderConfig->ism_extended_metadata_flag, L_negate( ONE_IN_Q8 ), 0, NULL, st_ivas->hSCE[0]->hCoreCoder[0]->ini_frame ) ), IVAS_ERR_OK ) ) -{ - return error; -} - -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS -IF( st_ivas->hIsmMetaData != NULL ) -{ - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) - { - st_ivas->hIsmMetaData[ch]->last_true_azimuth = fix_to_float( st_ivas->hIsmMetaData[ch]->last_true_azimuth_fx, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_elevation = fix_to_float( st_ivas->hIsmMetaData[ch]->last_true_elevation_fx, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_radius = fix16_to_float( st_ivas->hIsmMetaData[ch]->last_true_radius_fx, Q9 ); - } -} -#endif -} -ELSE /* ISM_MODE_DISC */ -{ - test(); - IF( EQ_32( st_ivas->ism_mode, ISM_MASA_MODE_DISC ) || EQ_32( st_ivas->ism_mode, ISM_MASA_MODE_MASA_ONE_OBJ ) ) - { - ism_total_brate = 0; - move16(); - FOR( i = 0; i < st_ivas->nSCE; i++ ) + IF( NE_32( ( error = ivas_ism_metadata_enc( &st_ivas->hEncoderConfig->ivas_total_brate, nchan_ism, nchan_transport_ism, st_ivas->hIsmMetaData, st_ivas->hSCE, st_ivas->hSCE[st_ivas->nSCE - 1]->hMetaData, nb_bits_metadata, vad_flag, st_ivas->ism_mode, st_ivas->hParamIsm, st_ivas->hEncoderConfig->ism_extended_metadata_flag, L_negate( ONE_IN_Q8 ), 0, NULL, st_ivas->hSCE[0]->hCoreCoder[0]->ini_frame ) ), IVAS_ERR_OK ) ) { - ism_total_brate = L_add( ism_total_brate, st_ivas->hSCE[i]->element_brate ); + return error; } } - ELSE - { - ism_total_brate = st_ivas->hEncoderConfig->ivas_total_brate; - move32(); - } - - ism_total_brate_ref = ism_total_brate; - move32(); - -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - IF( st_ivas->hMasa != NULL ) - { - st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx = floatToFixed( st_ivas->hMasa->data.hOmasaData->lp_noise_CPE, Q8 ); - } - - IF( NE_16( st_ivas->ism_mode, ISM_MASA_MODE_DISC ) || NE_16( st_ivas->ism_mode, ISM_MASA_MODE_MASA_ONE_OBJ ) ) + ELSE /* ISM_MODE_DISC */ { - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) + test(); + IF( EQ_32( st_ivas->ism_mode, ISM_MASA_MODE_DISC ) || EQ_32( st_ivas->ism_mode, ISM_MASA_MODE_MASA_ONE_OBJ ) ) { - IF( EQ_16( st_ivas->ism_mode, ISM_MODE_DISC ) || EQ_16( st_ivas->ism_mode, ISM_MASA_MODE_DISC ) || EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) + ism_total_brate = 0; + move16(); + FOR( i = 0; i < st_ivas->nSCE; i++ ) { - IF( EQ_16( st_ivas->hIsmMetaData[ch]->ism_metadata_flag, 1 ) ) - { - IF( NE_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) - { - st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise_fx = float_to_fix16( st_ivas->hSCE[ch]->hCoreCoder[0]->lp_noise, Q8 ); - } - IF( st_ivas->hIsmMetaData[ch]->ism_metadata_flag == 0 ) - { - st_ivas->hIsmMetaData[ch]->azimuth_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_azimuth_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_true_azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->elevation_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_elevation_fx = float_to_fix( st_ivas->hIsmMetaData[ch]->last_true_elevation, Q22 ); - st_ivas->hIsmMetaData[ch]->radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->radius, Q9 ); - st_ivas->hIsmMetaData[ch]->last_true_radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->last_true_radius, Q9 ); - } - } + ism_total_brate = L_add( ism_total_brate, st_ivas->hSCE[i]->element_brate ); } } - } - IF( st_ivas->hIsmMetaData != NULL ) - { - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) + ELSE { - st_ivas->hIsmMetaData[ch]->azimuth_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->elevation_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); - st_ivas->hIsmMetaData[ch]->yaw_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->yaw, Q22 ); - st_ivas->hIsmMetaData[ch]->pitch_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->pitch, Q22 ); - st_ivas->hIsmMetaData[ch]->radius_fx = float_to_fix16( st_ivas->hIsmMetaData[ch]->radius, Q9 ); + ism_total_brate = st_ivas->hEncoderConfig->ivas_total_brate; + move32(); } - } -#endif - IF( NE_32( ( error = ivas_ism_metadata_enc( &ism_total_brate, nchan_ism, nchan_transport_ism, st_ivas->hIsmMetaData, st_ivas->hSCE, st_ivas->hSCE[st_ivas->nSCE - 1]->hMetaData, - nb_bits_metadata, vad_flag, st_ivas->ism_mode, NULL, st_ivas->hEncoderConfig->ism_extended_metadata_flag, st_ivas->hMasa != NULL ? st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx : 0, flag_omasa_ener_brate, st_ivas->hMasa != NULL ? &( st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt ) : NULL, st_ivas->hSCE[0]->hCoreCoder[0]->ini_frame ) ), - IVAS_ERR_OK ) ) - { - return error; - } + ism_total_brate_ref = ism_total_brate; + move32(); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - IF( st_ivas->hIsmMetaData != NULL ) - { - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) + IF( NE_32( ( error = ivas_ism_metadata_enc( &ism_total_brate, nchan_ism, nchan_transport_ism, st_ivas->hIsmMetaData, st_ivas->hSCE, st_ivas->hSCE[st_ivas->nSCE - 1]->hMetaData, + nb_bits_metadata, vad_flag, st_ivas->ism_mode, NULL, st_ivas->hEncoderConfig->ism_extended_metadata_flag, st_ivas->hMasa != NULL ? st_ivas->hMasa->data.hOmasaData->lp_noise_CPE_fx : 0, flag_omasa_ener_brate, st_ivas->hMasa != NULL ? &( st_ivas->hMasa->data.hOmasaData->omasa_stereo_sw_cnt ) : NULL, st_ivas->hSCE[0]->hCoreCoder[0]->ini_frame ) ), + IVAS_ERR_OK ) ) { - st_ivas->hIsmMetaData[ch]->last_true_azimuth = fix_to_float( st_ivas->hIsmMetaData[ch]->last_true_azimuth_fx, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_elevation = fix_to_float( st_ivas->hIsmMetaData[ch]->last_true_elevation_fx, Q22 ); - st_ivas->hIsmMetaData[ch]->last_true_radius = fix16_to_float( st_ivas->hIsmMetaData[ch]->last_true_radius_fx, Q9 ); + return error; } - } -#endif - IF( EQ_32( st_ivas->hEncoderConfig->ivas_format, MASA_ISM_FORMAT ) ) - { - st_ivas->hCPE[0]->brate_surplus = L_sub( ism_total_brate_ref, ism_total_brate ); - move32(); + IF( EQ_32( st_ivas->hEncoderConfig->ivas_format, MASA_ISM_FORMAT ) ) + { + st_ivas->hCPE[0]->brate_surplus = L_sub( ism_total_brate_ref, ism_total_brate ); + move32(); + } } -} + update_last_metadata_fx( nchan_ism, st_ivas->hIsmMetaData, md_diff_flag ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS -IF( st_ivas->hIsmMetaData != NULL ) -{ - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) + /*----------------------------------------------------------------* + * Write IVAS format signaling in SID frames + *----------------------------------------------------------------*/ + + st = st_ivas->hSCE[0]->hCoreCoder[0]; + + IF( sid_flag ) { - st_ivas->hIsmMetaData[ch]->azimuth_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->azimuth, Q22 ); - st_ivas->hIsmMetaData[ch]->elevation_fx = floatToFixed( st_ivas->hIsmMetaData[ch]->elevation, Q22 ); + ivas_write_format_sid_fx( st_ivas->hEncoderConfig->ivas_format, IVAS_SCE, st->hBstr ); } -} -#endif -update_last_metadata_fx( nchan_ism, st_ivas->hIsmMetaData, md_diff_flag ); -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS -IF( st_ivas->hIsmMetaData != NULL ) -{ - FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) + /*only metadata encoding is needed for this case*/ + IF( EQ_32( st_ivas->hEncoderConfig->ivas_format, SBA_ISM_FORMAT ) ) { - st_ivas->hIsmMetaData[ch]->last_azimuth = fix_to_float( st_ivas->hIsmMetaData[ch]->last_azimuth_fx, Q22 ); - st_ivas->hIsmMetaData[ch]->last_elevation = fix_to_float( st_ivas->hIsmMetaData[ch]->last_elevation_fx, Q22 ); + assert( st_ivas->ism_mode != ISM_MODE_NONE ); + return error; } -} -#endif - -/*----------------------------------------------------------------* - * Write IVAS format signaling in SID frames - *----------------------------------------------------------------*/ - -st = st_ivas->hSCE[0]->hCoreCoder[0]; - -IF( sid_flag ) -{ - ivas_write_format_sid_fx( st_ivas->hEncoderConfig->ivas_format, IVAS_SCE, st->hBstr ); -} - -/*only metadata encoding is needed for this case*/ -IF( EQ_32( st_ivas->hEncoderConfig->ivas_format, SBA_ISM_FORMAT ) ) -{ - assert( st_ivas->ism_mode != ISM_MODE_NONE ); - return error; -} - -/*------------------------------------------------------------------* - * CoreCoders encoding - *-----------------------------------------------------------------*/ -FOR( sce_id = 0; sce_id < nchan_transport_ism; sce_id++ ) -{ - hSCE = st_ivas->hSCE[sce_id]; - st = hSCE->hCoreCoder[0]; + /*------------------------------------------------------------------* + * CoreCoders encoding + *-----------------------------------------------------------------*/ - /* update pointer to the buffer of indices of the next channel */ - IF( sce_id > 0 ) + FOR( sce_id = 0; sce_id < nchan_transport_ism; sce_id++ ) { - st->hBstr->ind_list = prev_st->hBstr->ind_list + prev_st->hBstr->nb_ind_tot; - } + hSCE = st_ivas->hSCE[sce_id]; + st = hSCE->hCoreCoder[0]; - if ( st->low_rate_mode ) - { - st->bwidth = WB; - move16(); - } + /* update pointer to the buffer of indices of the next channel */ + IF( sce_id > 0 ) + { + st->hBstr->ind_list = prev_st->hBstr->ind_list + prev_st->hBstr->nb_ind_tot; + } - /*----------------------------------------------------------------* - * Core codec configuration - *----------------------------------------------------------------*/ + if ( st->low_rate_mode ) + { + st->bwidth = WB; + move16(); + } - /* IGF reconfiguration */ - test(); - IF( NE_32( hSCE->last_element_brate, hSCE->element_brate ) || NE_32( st->last_bwidth, st->bwidth ) ) - { - Word16 igf; - igf = getIgfPresent_fx( st->element_mode, L_mult0( st->bits_frame_nominal, FRAMES_PER_SEC ), st->max_bwidth, st->rf_mode ); - IF( NE_32( ( error = IGF_Reconfig_fx( &st->hIGFEnc, igf, 0, L_mult0( st->bits_frame_nominal, FRAMES_PER_SEC ), st->max_bwidth, st->element_mode, st->rf_mode ) ), IVAS_ERR_OK ) ) + /*----------------------------------------------------------------* + * Core codec configuration + *----------------------------------------------------------------*/ + + /* IGF reconfiguration */ + test(); + IF( NE_32( hSCE->last_element_brate, hSCE->element_brate ) || NE_32( st->last_bwidth, st->bwidth ) ) { - return error; + Word16 igf; + igf = getIgfPresent_fx( st->element_mode, L_mult0( st->bits_frame_nominal, FRAMES_PER_SEC ), st->max_bwidth, st->rf_mode ); + IF( NE_32( ( error = IGF_Reconfig_fx( &st->hIGFEnc, igf, 0, L_mult0( st->bits_frame_nominal, FRAMES_PER_SEC ), st->max_bwidth, st->element_mode, st->rf_mode ) ), IVAS_ERR_OK ) ) + { + return error; + } } - } - /* set ACELP@12k8 / ACELP@16k flag for flexible ACELP core */ - test(); - IF( EQ_32( st->core_brate, SID_2k40 ) || st->core_brate == FRAME_NO_DATA ) - { - st->flag_ACELP16k = set_ACELP_flag_IVAS( IVAS_SCE, hSCE->element_brate, st->core_brate, 0, 0, -1, -1 ); - move16(); - } - ELSE IF( st->low_rate_mode ) - { - st->flag_ACELP16k = 0; - move16(); - } - ELSE - { - st->flag_ACELP16k = set_ACELP_flag_IVAS( IVAS_SCE, hSCE->element_brate, st->total_brate, 0, 0, -1, -1 ); - move16(); - } + /* set ACELP@12k8 / ACELP@16k flag for flexible ACELP core */ + test(); + IF( EQ_32( st->core_brate, SID_2k40 ) || st->core_brate == FRAME_NO_DATA ) + { + st->flag_ACELP16k = set_ACELP_flag_IVAS( IVAS_SCE, hSCE->element_brate, st->core_brate, 0, 0, -1, -1 ); + move16(); + } + ELSE IF( st->low_rate_mode ) + { + st->flag_ACELP16k = 0; + move16(); + } + ELSE + { + st->flag_ACELP16k = set_ACELP_flag_IVAS( IVAS_SCE, hSCE->element_brate, st->total_brate, 0, 0, -1, -1 ); + move16(); + } -#ifdef IVAS_FLOAT_FIXED_CONVERSIONS - relE_fx[sce_id][0] = float_to_fix16( relE[sce_id][0], Q8 ); -#endif - /* modify the coder_type depending on the total_brate per channel */ - coder_type_modif_ivas_fx( st, relE_fx[sce_id][0] ); + /* modify the coder_type depending on the total_brate per channel */ + coder_type_modif_ivas_fx( st, relE_fx[sce_id][0] ); - /*----------------------------------------------------------------* - * Encoder - *----------------------------------------------------------------*/ - test(); - test(); - IF( !dtx_flag || ( dtx_flag && EQ_16( sce_id, st_ivas->hISMDTX->sce_id_dtx ) ) ) - { - IF( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8[sce_id], old_inp_16k[sce_id], ener[sce_id], A[sce_id], Aw[sce_id], epsP[sce_id], lsp_new[sce_id], lsp_mid[sce_id], vad_hover_flag[sce_id], attack_flag[sce_id], realBuffer[sce_id], imagBuffer[sce_id], old_wsp[sce_id], loc_harm[sce_id], cor_map_sum[sce_id], vad_flag_dtx[sce_id], enerBuffer[sce_id], fft_buff[sce_id], 0, ISM_FORMAT, 0 ) ) != IVAS_ERR_OK ) + /*----------------------------------------------------------------* + * Encoder + *----------------------------------------------------------------*/ + test(); + test(); + IF( !dtx_flag || ( dtx_flag && EQ_16( sce_id, st_ivas->hISMDTX->sce_id_dtx ) ) ) { - return error; + IF( ( error = ivas_core_enc( hSCE, NULL, NULL, 1, old_inp_12k8[sce_id], old_inp_16k[sce_id], ener[sce_id], A[sce_id], Aw[sce_id], epsP[sce_id], lsp_new[sce_id], lsp_mid[sce_id], vad_hover_flag[sce_id], attack_flag[sce_id], realBuffer[sce_id], imagBuffer[sce_id], old_wsp[sce_id], loc_harm[sce_id], cor_map_sum[sce_id], vad_flag_dtx[sce_id], enerBuffer[sce_id], fft_buff[sce_id], 0, ISM_FORMAT, 0 ) ) != IVAS_ERR_OK ) + { + return error; + } } - } - /*----------------------------------------------------------------* - * Common updates - *----------------------------------------------------------------*/ + /*----------------------------------------------------------------* + * Common updates + *----------------------------------------------------------------*/ - /* update input samples buffer */ - mvr2r( st->input, st->old_input_signal, input_frame ); + /* update input samples buffer */ + mvr2r( st->input, st->old_input_signal, input_frame ); - hSCE->last_element_brate = hSCE->element_brate; - move32(); + hSCE->last_element_brate = hSCE->element_brate; + move32(); - /* Store previous attack detection flag */ - st->hTranDet->transientDetector.prev_bIsAttackPresent = st->hTranDet->transientDetector.bIsAttackPresent; - move16(); + /* Store previous attack detection flag */ + st->hTranDet->transientDetector.prev_bIsAttackPresent = st->hTranDet->transientDetector.bIsAttackPresent; + move16(); - prev_st = st; -} + prev_st = st; + } -IF( dtx_flag ) -{ - FOR( sce_id = 0; sce_id < nchan_transport_ism; sce_id++ ) + IF( dtx_flag ) { - IF( NE_16( sce_id, st_ivas->hISMDTX->sce_id_dtx ) ) + FOR( sce_id = 0; sce_id < nchan_transport_ism; sce_id++ ) { - st_ivas->hSCE[sce_id]->hCoreCoder[0]->last_core = st_ivas->hSCE[st_ivas->hISMDTX->sce_id_dtx]->hCoreCoder[0]->last_core; - st_ivas->hSCE[sce_id]->hCoreCoder[0]->last_core_brate = st_ivas->hSCE[st_ivas->hISMDTX->sce_id_dtx]->hCoreCoder[0]->core_brate; - st_ivas->hSCE[sce_id]->hCoreCoder[0]->last_L_frame = st_ivas->hSCE[st_ivas->hISMDTX->sce_id_dtx]->hCoreCoder[0]->last_L_frame; - move16(); - move32(); - move16(); + IF( NE_16( sce_id, st_ivas->hISMDTX->sce_id_dtx ) ) + { + st_ivas->hSCE[sce_id]->hCoreCoder[0]->last_core = st_ivas->hSCE[st_ivas->hISMDTX->sce_id_dtx]->hCoreCoder[0]->last_core; + st_ivas->hSCE[sce_id]->hCoreCoder[0]->last_core_brate = st_ivas->hSCE[st_ivas->hISMDTX->sce_id_dtx]->hCoreCoder[0]->core_brate; + st_ivas->hSCE[sce_id]->hCoreCoder[0]->last_L_frame = st_ivas->hSCE[st_ivas->hISMDTX->sce_id_dtx]->hCoreCoder[0]->last_L_frame; + move16(); + move32(); + move16(); + } } } -} -#ifdef DEBUG_MODE_INFO -if ( dtx_flag ) -{ - float tmpF; - int16_t id, n; + pop_wmops(); - n = 0; - for ( sce_id = 0; sce_id < nchan_transport_ism; sce_id++ ) +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + IF( st_ivas->hIsmMetaData != NULL ) { - if ( sce_id != st_ivas->hISMDTX->sce_id_dtx ) + FOR( int ch = 0; ch < st_ivas->hEncoderConfig->nchan_ism; ch++ ) { - st = st_ivas->hSCE[sce_id]->hCoreCoder[0]; - id = st->id_element; - - dbgwrite( &st->core, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "core", n, id, ENC ) ); - dbgwrite( &st->extl, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "extl", n, id, ENC ) ); - dbgwrite( &st->bwidth, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "bwidth", n, id, ENC ) ); - tmpF = st->total_brate / 1000.0f; - dbgwrite( &tmpF, sizeof( float ), 1, input_frame, fname( debug_dir, "total_brate", n, id, ENC ) ); - tmpF = st->core_brate / 1000.0f; - dbgwrite( &tmpF, sizeof( float ), 1, input_frame, fname( debug_dir, "core_brate", n, id, ENC ) ); - tmpF = st->extl_brate / 1000.0f; - dbgwrite( &tmpF, sizeof( float ), 1, input_frame, fname( debug_dir, "extl_brate", n, id, ENC ) ); - - dbgwrite( &st->coder_type, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "coder_type", n, id, ENC ) ); - dbgwrite( &st->coder_type_raw, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "coder_type_raw", n, id, ENC ) ); - dbgwrite( &st->vad_flag, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "vad_flag", n, id, ENC ) ); - dbgwrite( &st->localVAD, sizeof( int16_t ), 1, input_frame, fname( debug_dir, "localVAD", n, id, ENC ) ); + st_ivas->hIsmMetaData[ch]->last_true_azimuth = fix_to_float( st_ivas->hIsmMetaData[ch]->last_true_azimuth_fx, Q22 ); + st_ivas->hIsmMetaData[ch]->last_true_elevation = fix_to_float( st_ivas->hIsmMetaData[ch]->last_true_elevation_fx, Q22 ); + st_ivas->hIsmMetaData[ch]->last_true_radius = fix16_to_float( st_ivas->hIsmMetaData[ch]->last_true_radius_fx, Q9 ); + st_ivas->hIsmMetaData[ch]->last_azimuth = fix_to_float( st_ivas->hIsmMetaData[ch]->last_azimuth_fx, Q22 ); + st_ivas->hIsmMetaData[ch]->last_elevation = fix_to_float( st_ivas->hIsmMetaData[ch]->last_elevation_fx, Q22 ); } } -} + IF( st_ivas->hISMDTX ) + { + me2f_buf( &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_fx[0][0], st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc_e, &st_ivas->hISMDTX->long_term_energy_stereo_dmx_enc[0][0], st_ivas->nchan_transport * PARAM_ISM_HYS_BUF_SIZE ); + fixedToFloat_arr( st_ivas->hISMDTX->coh_fx, st_ivas->hISMDTX->coh, Q15, st_ivas->nchan_transport ); + } #endif -pop_wmops(); - -return error; + return error; } #endif diff --git a/lib_enc/ivas_omasa_enc.c b/lib_enc/ivas_omasa_enc.c index b6bf286c9..ffaab15e5 100644 --- a/lib_enc/ivas_omasa_enc.c +++ b/lib_enc/ivas_omasa_enc.c @@ -58,9 +58,11 @@ static void ivas_omasa_energy_and_ratio_est( OMASA_ENC_HANDLE hOMasa, OMASA_ENCO static void ivas_omasa_dmx( float *data_in_f[], float data_out_f[][L_FRAME48k], const int16_t input_frame, const int16_t nchan_transport, const int16_t nchan_ism, ISM_METADATA_HANDLE hIsmMeta[], float prev_gains[][MASA_MAX_TRANSPORT_CHANNELS], const float interpolator[L_FRAME48k] ); static void computeIntensityVector_enc( const int16_t *band_grouping, float Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], float Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], const int16_t num_frequency_bands, float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS] ); - +#ifndef IVAS_FLOAT_FIXED static void computeReferencePower_omasa( const int16_t *band_grouping, float Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], float Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], float *reference_power, const int16_t enc_param_start_band, const int16_t num_freq_bands ); - +#else +static void computeReferencePower_omasa_ivas_fx( const Word16 *band_grouping, Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], Word32 Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], Word32 *reference_power, const Word16 enc_param_start_band, const Word16 num_freq_bands, Word16 *ref_exp ); +#endif /*--------------------------------------------------------------------------* * ivas_omasa_enc_open() * @@ -880,6 +882,10 @@ static void ivas_omasa_param_est_enc( const int16_t nchan_ism ) { float reference_power[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; +#ifdef IVAS_FLOAT_FIXED + Word32 reference_power_fx[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX]; // Q(31-ref_exp) + Word16 ref_exp; +#endif int16_t ts, i, j, d, k; int16_t num_freq_bins, num_freq_bands, index; float dir_v[DIRAC_NUM_DIMS]; @@ -893,6 +899,10 @@ static void ivas_omasa_param_est_enc( float Chnl_ImagBuffer[MCMASA_MAX_ANA_CHANS][CLDFB_NO_CHANNELS_MAX]; float Foa_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; float Foa_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; +#ifdef IVAS_FLOAT_FIXED + Word32 Foa_RealBuffer_fx[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; // Q6 + Word32 Foa_ImagBuffer_fx[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX]; // Q6 +#endif float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; float direction_vector[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; float diffuseness_vector[MASA_FREQUENCY_BANDS]; @@ -996,8 +1006,22 @@ static void ivas_omasa_param_est_enc( computeDirectionVectors( intensity_real[0], intensity_real[1], intensity_real[2], 0, num_freq_bands, direction_vector[0], direction_vector[1], direction_vector[2] ); /* Power estimation for diffuseness */ +#ifndef IVAS_FLOAT_FIXED computeReferencePower_omasa( hOMasa->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, reference_power[ts], 0, num_freq_bands ); - +#else +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + // Q for Foa_RealBuffer_fx and Foa_ImagBuffer is assumed to be 6 + for ( int r = 0; r < FOA_CHANNELS; r++ ) + { + floatToFixed_arrL( Foa_RealBuffer[r], Foa_RealBuffer_fx[r], 6, CLDFB_NO_CHANNELS_MAX ); + floatToFixed_arrL( Foa_ImagBuffer[r], Foa_ImagBuffer_fx[r], 6, CLDFB_NO_CHANNELS_MAX ); + } +#endif + computeReferencePower_omasa_ivas_fx( hOMasa->band_grouping, Foa_RealBuffer_fx, Foa_ImagBuffer_fx, reference_power_fx[ts], 0, num_freq_bands, &ref_exp ); +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + fixedToFloat_arrL( reference_power_fx[ts], reference_power[ts], 31 - ref_exp, num_freq_bands ); +#endif +#endif /* Fill buffers of length "averaging_length" time slots for intensity and energy */ hOMasa->index_buffer_intensity = ( hOMasa->index_buffer_intensity % DIRAC_NO_COL_AVG_DIFF ) + 1; /* averaging_length = 32 */ index = hOMasa->index_buffer_intensity; @@ -1435,6 +1459,7 @@ static void computeIntensityVector_enc( } +#ifndef IVAS_FLOAT_FIXED static void computeReferencePower_omasa( const int16_t *band_grouping, /* i : Band grouping for estimation */ float Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal */ @@ -1467,3 +1492,64 @@ static void computeReferencePower_omasa( return; } +#else +static void computeReferencePower_omasa_ivas_fx( + const Word16 *band_grouping, /* i : Band grouping for estimation */ + Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal Q6*/ + Word32 Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Imag part of input signal Q6*/ + Word32 *reference_power, /* o : Estimated power Q(31-ref_exp)*/ + const Word16 enc_param_start_band, /* i : first band to process */ + const Word16 num_freq_bands, /* i : Number of frequency bands */ + Word16 *ref_exp ) +{ + Word16 brange[2]; + Word16 ch_idx, i, j; + Word64 reference_power_tmp[CLDFB_NO_CHANNELS_MAX]; + Word16 ref_Q = 63; + move16(); + + FOR( i = 0; i < num_freq_bands; i++ ) + { + brange[0] = band_grouping[i + enc_param_start_band]; + move16(); + brange[1] = band_grouping[i + enc_param_start_band + 1]; + move16(); + reference_power[i] = 0; + move32(); + reference_power_tmp[i] = 0; + move64(); + + FOR( ch_idx = 0; ch_idx < FOA_CHANNELS; ch_idx++ ) + { + /* abs()^2 */ + FOR( j = brange[0]; j < brange[1]; j++ ) + { + // reference_power[i] += ( Cldfb_RealBuffer[ch_idx][j] * Cldfb_RealBuffer[ch_idx][j] ) + ( Cldfb_ImagBuffer[ch_idx][j] * Cldfb_ImagBuffer[ch_idx][j] ); + // Q13 (Q6+Q6+1) + reference_power_tmp[i] = W_add( reference_power_tmp[i], W_mac_32_32( W_mult_32_32( Cldfb_RealBuffer[ch_idx][j], Cldfb_RealBuffer[ch_idx][j] ), Cldfb_ImagBuffer[ch_idx][j], Cldfb_ImagBuffer[ch_idx][j] ) ); + move64(); + } + } + } + // v_multc( reference_power, 0.5f, reference_power, num_freq_bands ); + FOR( i = 0; i < num_freq_bands; i++ ) + { + reference_power_tmp[i] = W_shr( reference_power_tmp[i], 1 ); + move64(); + ref_Q = s_min( ref_Q, W_norm( reference_power_tmp[i] ) ); + } + + FOR( i = 0; i < num_freq_bands; i++ ) + { + reference_power_tmp[i] = W_shl( reference_power_tmp[i], ref_Q ); // Q13 + ref_Q + move64(); + reference_power[i] = W_extract_h( reference_power_tmp[i] ); // Q13 + ref_Q -32 + move32(); + } + + // ref_exp = 31- ((13+ref_Q) -32) + *ref_exp = sub( 31, ( sub( add( ref_Q, 13 ), 32 ) ) ); + move16(); + return; +} +#endif diff --git a/lib_enc/ivas_qmetadata_enc.c b/lib_enc/ivas_qmetadata_enc.c index e0a8fdeb1..649e2c733 100644 --- a/lib_enc/ivas_qmetadata_enc.c +++ b/lib_enc/ivas_qmetadata_enc.c @@ -64,13 +64,21 @@ static int16_t ivas_qmetadata_entropy_encode_dir( BSTR_ENC_HANDLE hMetaData, IVA static int16_t ivas_qmetadata_raw_encode_dir( BSTR_ENC_HANDLE hMetaData, IVAS_QDIRECTION *q_direction, const int16_t nbands, const int16_t start_band ); -int16_t ivas_qmetadata_encode_extended_gr_length( const uint16_t value, const uint16_t alphabet_size, const int16_t gr_param ); - static int16_t ivas_qmetadata_get_optimal_gr_param( uint16_t *unsigned_data, const int16_t count, const int16_t gr_param_count, int16_t *opt_gr_size ); +#ifdef IVAS_FLOAT_FIXED +Word16 ivas_qmetadata_encode_extended_gr_length( const UWord16 value, const UWord16 alphabet_size, const Word16 gr_param ); + +static Word16 ivas_qmetadata_encode_quasi_uniform_length( const UWord16 value, const UWord16 alphabet_size ); + +static void ivas_qmetadata_encode_quasi_uniform( BSTR_ENC_HANDLE hMetaData, const UWord16 value, const UWord16 alphabet_size ); +#else +int16_t ivas_qmetadata_encode_extended_gr_length( const uint16_t value, const uint16_t alphabet_size, const int16_t gr_param ); + static int16_t ivas_qmetadata_encode_quasi_uniform_length( const uint16_t value, const uint16_t alphabet_size ); static void ivas_qmetadata_encode_quasi_uniform( BSTR_ENC_HANDLE hMetaData, const uint16_t value, const uint16_t alphabet_size ); +#endif static int16_t ivas_qmetadata_reorder_elevation_index( const int16_t elevation_index, const int16_t avg_elevation_index, const int16_t elevation_alphabet ); @@ -115,12 +123,17 @@ static int16_t write_2dir_info( BSTR_ENC_HANDLE hMetaData, uint8_t *twoDirBands, static void transform_azimuth_dir2( IVAS_QMETADATA_HANDLE hQMetaData, int16_t *dir2_bands ); static int16_t calc_var_azi( const IVAS_QDIRECTION *q_direction, const int16_t diffuseness_index_max_ec_frame, const float avg_azimuth, float *avg_azimuth_out ); - +#ifndef IVAS_FLOAT_FIXED static void ivas_qmetadata_quantize_diffuseness_nrg_ratios_hr_512( IVAS_QMETADATA_HANDLE hQMetaData, int16_t *needed_bits, const int16_t bits_dir_hr, BSTR_ENC_HANDLE hMetaData ); -static int16_t encode_surround_coherence_hr( IVAS_QMETADATA *hQMetaData, BSTR_ENC_HANDLE hMetaData ); - static int16_t ivas_qmetadata_quantize_coherence_hr_512( IVAS_QMETADATA *hQMetaData, const int16_t idx_d, const int16_t all_coherence_zero, BSTR_ENC_HANDLE hMetaData, const int16_t bits_coh ); +#else +static Word16 ivas_qmetadata_quantize_coherence_hr_512_fx( IVAS_QMETADATA *hQMetaData, const Word16 idx_d, const Word16 all_coherence_zero, BSTR_ENC_HANDLE hMetaData, const Word16 bits_coh ); + +static void ivas_qmetadata_quantize_diffuseness_nrg_ratios_hr_512_fx( IVAS_QMETADATA_HANDLE hQMetaData, Word16 *needed_bits, const Word16 bits_dir_hr, BSTR_ENC_HANDLE hMetaData ); +#endif + +static int16_t encode_surround_coherence_hr( IVAS_QMETADATA *hQMetaData, BSTR_ENC_HANDLE hMetaData ); static int16_t write_stream_dct_coeffs_omasa( int16_t *q_idx, const int16_t len_stream, BSTR_ENC_HANDLE hMetaData, const int16_t first_line, const int16_t low_bitrate_mode ); @@ -998,9 +1011,55 @@ ivas_error ivas_qmetadata_enc_encode_hr_384_512( hQMetaData->q_direction[1].cfg.nbands = hQMetaData->numTwoDirBands; } +#ifdef IVAS_FLOAT_FIXED +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + for ( j = hQMetaData->q_direction[0].cfg.start_band; j < hQMetaData->q_direction[0].cfg.nbands; ++j ) + { + for ( int k = 0; k < hQMetaData->q_direction[0].cfg.nblocks; k++ ) + { + hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k] = floatToFixed( hQMetaData->q_direction[0].band_data[j].energy_ratio[k], Q30 ); + } + } + if ( EQ_16( hQMetaData->no_directions, 2 ) ) + { + for ( j = hQMetaData->q_direction[1].cfg.start_band; j < hQMetaData->q_direction[1].cfg.nbands; ++j ) + { + for ( int k = 0; k < hQMetaData->q_direction[1].cfg.nblocks; k++ ) + { + hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = floatToFixed( hQMetaData->q_direction[1].band_data[j].energy_ratio[k], Q30 ); + } + } + } +#endif // IVAS_FLOAT_FIXED_CONVERSIONS + + /*Quantization and encoding of the Diffuseness */ + ivas_qmetadata_quantize_diffuseness_nrg_ratios_hr_512_fx( hQMetaData, bits_diff, bits_sph_idx, hMetaData ); + +#ifdef IVAS_FLOAT_FIXED_CONVERSIONS + for ( j = hQMetaData->q_direction[0].cfg.start_band; j < hQMetaData->q_direction[0].cfg.nbands; ++j ) + { + for ( int k = 0; k < hQMetaData->q_direction[0].cfg.nblocks; k++ ) + { + hQMetaData->q_direction[0].band_data[j].energy_ratio[k] = fixedToFloat( hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k], Q30 ); + } + } + if ( EQ_16( hQMetaData->no_directions, 2 ) ) + { + for ( j = hQMetaData->q_direction[1].cfg.start_band; j < hQMetaData->q_direction[1].cfg.nbands; ++j ) + { + for ( int k = 0; k < hQMetaData->q_direction[1].cfg.nblocks; k++ ) + { + hQMetaData->q_direction[1].band_data[j].energy_ratio[k] = fixedToFloat( hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k], Q30 ); + } + } + } +#endif // IVAS_FLOAT_FIXED_CONVERSIONS +#else /*Quantization and encoding of the Diffuseness */ ivas_qmetadata_quantize_diffuseness_nrg_ratios_hr_512( hQMetaData, bits_diff, bits_sph_idx, hMetaData ); +#endif // IVAS_FLOAT_FIXED + /* Encode surround coherence */ if ( all_coherence_zero == 0 ) @@ -1029,13 +1088,19 @@ ivas_error ivas_qmetadata_enc_encode_hr_384_512( q_direction->not_in_2D = 0; - +#ifdef IVAS_FLOAT_FIXED + /*Coherence */ + IF( all_coherence_zero == 0 ) + { + ivas_qmetadata_quantize_coherence_hr_512_fx( hQMetaData, d, all_coherence_zero, hMetaData, bits_sp_coh ); + } +#else /*Coherence */ if ( all_coherence_zero == 0 ) { ivas_qmetadata_quantize_coherence_hr_512( hQMetaData, d, all_coherence_zero, hMetaData, bits_sp_coh ); } - +#endif /* write the spherical indexes */ bits_ec = hMetaData->nb_bits_tot; if ( bits_sph_idx == 11 ) @@ -1728,117 +1793,129 @@ int16_t quantize_direction2D( } #ifdef IVAS_FLOAT_FIXED -static void ivas_qmetadata_quantize_diffuseness_nrg_ratios_hr_512( +static void ivas_qmetadata_quantize_diffuseness_nrg_ratios_hr_512_fx( IVAS_QMETADATA_HANDLE hQMetaData, - int16_t *needed_bits, - const int16_t bits_dir_hr, + Word16 *needed_bits, + const Word16 bits_dir_hr, BSTR_ENC_HANDLE hMetaData ) { - int16_t j, k; - int16_t index; + Word16 j, k; + Word16 index; needed_bits[0] = 0; needed_bits[1] = 0; + move16(); + move16(); - for ( j = hQMetaData->q_direction[0].cfg.start_band; j < hQMetaData->q_direction[0].cfg.nbands; ++j ) + FOR( j = hQMetaData->q_direction[0].cfg.start_band; j < hQMetaData->q_direction[0].cfg.nbands; ++j ) { - for ( k = 0; k < hQMetaData->q_direction[0].cfg.nblocks; k++ ) + FOR( k = 0; k < hQMetaData->q_direction[0].cfg.nblocks; k++ ) { -#ifdef IVAS_FLOAT_FIXED - /*=====================================flt-2-fix============================================*/ - hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k] = floatToFixed( hQMetaData->q_direction[0].band_data[j].energy_ratio[k], Q30 ); - /*=====================================flt-2-fix============================================*/ - index = masa_sq_fx( L_sub( ONE_IN_Q30, hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k] ), diffuseness_thresholds_hr_fx, HR_MASA_ER_LEVELS ); -#else - index = masa_sq( 1.0f - hQMetaData->q_direction[0].band_data[j].energy_ratio[k], diffuseness_thresholds_hr, HR_MASA_ER_LEVELS ); -#endif + push_next_indice( hMetaData, index, MASA_BITS_ER_HR ); hQMetaData->q_direction[0].band_data[j].energy_ratio_index[k] = index; hQMetaData->q_direction[0].band_data[j].energy_ratio_index_mod[k] = index; - hQMetaData->q_direction[0].band_data[j].energy_ratio[k] = 1.0f - diffuseness_reconstructions_hr[index]; - needed_bits[0] += MASA_BITS_ER_HR; + hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k] = W_extract_h( W_sub( ONE_IN_Q62, diffuseness_reconstructions_hr_fx[index] ) ); // Q30 + needed_bits[0] = add( needed_bits[0], MASA_BITS_ER_HR ); hQMetaData->q_direction[0].band_data[j].bits_sph_idx[k] = bits_dir_hr; + move16(); + move16(); + move32(); + move16(); + move16(); } } - if ( hQMetaData->no_directions == 2 ) + IF( EQ_16( hQMetaData->no_directions, 2 ) ) { - float ratioSum; - if ( bits_dir_hr == 16 ) + Word32 ratioSum; + Word16 div_e; + IF( EQ_16( bits_dir_hr, 16 ) ) { - for ( j = hQMetaData->q_direction[1].cfg.start_band; j < hQMetaData->q_direction[1].cfg.nbands; j++ ) + FOR( j = hQMetaData->q_direction[1].cfg.start_band; j < hQMetaData->q_direction[1].cfg.nbands; j++ ) { - for ( k = 0; k < hQMetaData->q_direction[1].cfg.nblocks; k++ ) + FOR( k = 0; k < hQMetaData->q_direction[1].cfg.nblocks; k++ ) { -#ifdef IVAS_FLOAT_FIXED - /*=====================================flt-2-fix============================================*/ - hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = floatToFixed( hQMetaData->q_direction[1].band_data[j].energy_ratio[k], Q30 ); - /*=====================================flt-2-fix============================================*/ - index = masa_sq_fx( L_sub( ONE_IN_Q30, hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] ), diffuseness_thresholds_hr_fx, HR_MASA_ER_LEVELS ); -#else - index = masa_sq( 1.0f - hQMetaData->q_direction[1].band_data[j].energy_ratio[k], diffuseness_thresholds_hr, HR_MASA_ER_LEVELS ); -#endif + push_next_indice( hMetaData, index, MASA_BITS_ER_HR ); hQMetaData->q_direction[1].band_data[j].energy_ratio_index[k] = index; - hQMetaData->q_direction[1].band_data[j].energy_ratio[k] = 1.0f - diffuseness_reconstructions_hr[index]; + move16(); + hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = W_extract_h( W_sub( ONE_IN_Q62, diffuseness_reconstructions_hr_fx[index] ) ); + move32(); - ratioSum = hQMetaData->q_direction[0].band_data[j].energy_ratio[k] + hQMetaData->q_direction[1].band_data[j].energy_ratio[k]; - if ( ratioSum > 1.0f ) + ratioSum = L_add( hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k], hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] ); + IF( GT_32( ratioSum, ONE_IN_Q30 /*1.0f*/ ) ) { - hQMetaData->q_direction[0].band_data[j].energy_ratio[k] /= ratioSum; - hQMetaData->q_direction[1].band_data[j].energy_ratio[k] /= ratioSum; + hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k] = BASOP_Util_Divide3232_Scale_cadence( hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k], ratioSum, &div_e ); + hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k] = L_shl( hQMetaData->q_direction[0].band_data[j].energy_ratio_fx[k], sub( div_e, 1 ) ); + move32(); + move32(); + + hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = BASOP_Util_Divide3232_Scale_cadence( hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k], ratioSum, &div_e ); + hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = L_shl( hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k], sub( div_e, 1 ) ); + move32(); + move32(); } - needed_bits[1] += MASA_BITS_ER_HR; + needed_bits[1] = add( needed_bits[1], MASA_BITS_ER_HR ); + move16(); hQMetaData->q_direction[1].band_data[j].bits_sph_idx[k] = bits_dir_hr; + move16(); } } } - else + ELSE { - int16_t pos_2dir_band[MASA_MAXIMUM_CODING_SUBBANDS]; + Word16 pos_2dir_band[MASA_MAXIMUM_CODING_SUBBANDS]; k = 0; - for ( j = hQMetaData->q_direction[0].cfg.start_band; j < hQMetaData->q_direction[0].cfg.nbands; j++ ) + move16(); + FOR( j = hQMetaData->q_direction[0].cfg.start_band; j < hQMetaData->q_direction[0].cfg.nbands; j++ ) { - if ( hQMetaData->twoDirBands[j] == 1 ) + IF( EQ_16( hQMetaData->twoDirBands[j], 1 ) ) { pos_2dir_band[k] = j; - k++; + move16(); + k = add( k, 1 ); } - else + ELSE { pos_2dir_band[k] = 0; + move16(); } } - for ( j = hQMetaData->q_direction[1].cfg.start_band; j < hQMetaData->q_direction[1].cfg.nbands; j++ ) + FOR( j = hQMetaData->q_direction[1].cfg.start_band; j < hQMetaData->q_direction[1].cfg.nbands; j++ ) { - for ( k = 0; k < hQMetaData->q_direction[1].cfg.nblocks; k++ ) + FOR( k = 0; k < hQMetaData->q_direction[1].cfg.nblocks; k++ ) { -#ifdef IVAS_FLOAT_FIXED - /*=====================================flt-2-fix============================================*/ - hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = floatToFixed( hQMetaData->q_direction[1].band_data[j].energy_ratio[k], Q30 ); - /*=====================================flt-2-fix============================================*/ - index = masa_sq_fx( L_sub( ONE_IN_Q30, hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] ), diffuseness_thresholds_hr_fx, HR_MASA_ER_LEVELS ); -#else - index = masa_sq( 1.0f - hQMetaData->q_direction[1].band_data[j].energy_ratio[k], diffuseness_thresholds_hr, HR_MASA_ER_LEVELS ); -#endif + push_next_indice( hMetaData, index, MASA_BITS_ER_HR ); hQMetaData->q_direction[1].band_data[j].energy_ratio_index[k] = index; - hQMetaData->q_direction[1].band_data[j].energy_ratio[k] = 1.0f - diffuseness_reconstructions_hr[index]; + move16(); + hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = W_extract_h( W_sub( ONE_IN_Q62, diffuseness_reconstructions_hr_fx[index] ) ); + move32(); - ratioSum = hQMetaData->q_direction[0].band_data[pos_2dir_band[j]].energy_ratio[k] + hQMetaData->q_direction[1].band_data[j].energy_ratio[k]; + ratioSum = L_add( hQMetaData->q_direction[0].band_data[pos_2dir_band[j]].energy_ratio_fx[k], hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] ); - if ( ratioSum > 1.0f ) + IF( GT_32( ratioSum, ONE_IN_Q30 /*1.0f*/ ) ) { - hQMetaData->q_direction[0].band_data[pos_2dir_band[j]].energy_ratio[k] /= ratioSum; - hQMetaData->q_direction[1].band_data[j].energy_ratio[k] /= ratioSum; + hQMetaData->q_direction[0].band_data[pos_2dir_band[j]].energy_ratio_fx[k] = BASOP_Util_Divide3232_Scale_cadence( hQMetaData->q_direction[0].band_data[pos_2dir_band[j]].energy_ratio_fx[k], ratioSum, &div_e ); + hQMetaData->q_direction[0].band_data[pos_2dir_band[j]].energy_ratio_fx[k] = L_shl( hQMetaData->q_direction[0].band_data[pos_2dir_band[j]].energy_ratio_fx[k], sub( div_e, 1 ) ); + move32(); + move32(); + + hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = BASOP_Util_Divide3232_Scale_cadence( hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k], ratioSum, &div_e ); + hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k] = L_shl( hQMetaData->q_direction[1].band_data[j].energy_ratio_fx[k], sub( div_e, 1 ) ); + move32(); + move32(); } - needed_bits[1] += MASA_BITS_ER_HR; + needed_bits[1] = add( needed_bits[1], MASA_BITS_ER_HR ); + move16(); hQMetaData->q_direction[1].band_data[j].bits_sph_idx[k] = bits_dir_hr; + move16(); } } } @@ -2648,7 +2725,7 @@ void restore_metadata_buffer( * * encode value using a quasi-uniform code of b or b + 1 bits, where b = floor(log2(alphabet_size)) *------------------------------------------------------------------------*/ - +#ifndef IVAS_FLOAT_FIXED static void ivas_qmetadata_encode_quasi_uniform( BSTR_ENC_HANDLE hMetaData, const uint16_t value, @@ -2671,6 +2748,30 @@ static void ivas_qmetadata_encode_quasi_uniform( return; } +#else +static void ivas_qmetadata_encode_quasi_uniform( + BSTR_ENC_HANDLE hMetaData, + const UWord16 value, + const UWord16 alphabet_size ) +{ + Word16 bits; + UWord16 tresh; + + bits = sub( 30, norm_l( alphabet_size ) ); /* bits = floor(log2(alphabet_size)) */ + tresh = (UWord16) L_sub( L_shl( 1U, add( bits, 1 ) ), alphabet_size ); + + IF( LT_32( value, tresh ) ) + { + push_next_indice( hMetaData, value, bits ); + } + ELSE /* value >= tresh */ + { + push_next_indice( hMetaData, (UWord16) L_add( value, tresh ), add( bits, 1 ) ); + } + + return; +} +#endif /*-----------------------------------------------------------------------* @@ -2681,7 +2782,59 @@ static void ivas_qmetadata_encode_quasi_uniform( * * *------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +/*! r: number of bits using Golomb Rice code */ +static Word16 GR_bits_new( + UWord16 *data, /* i : data to encode with GR */ + Word16 *no_symb, /* i : number of symbols for each component*/ + const Word16 no_data, /* i : number of input data */ + const Word16 GR_order, /* i : GR order to be used */ + const Word16 check_two_orders, /* i : check also coding with GR_order-1 */ + Word16 *real_GR_ord /* o : the GR order that has been used */ +) +{ + Word16 nbits = 0, i; + Word16 nbits1 = 0; + Word16 nb; + move16(); + move16(); + FOR( i = 0; i < no_data; i++ ) + { + nb = ivas_qmetadata_encode_extended_gr_length( data[i], no_symb[i], GR_order ); + nbits = add( nbits, nb ); + } + + IF( EQ_16( check_two_orders, 1 ) ) + { + FOR( i = 0; i < no_data; i++ ) + { + nb = ivas_qmetadata_encode_extended_gr_length( data[i], no_symb[i], GR_order - 1 ); + nbits1 = add( nbits1, nb ); + } + + IF( nbits1 < nbits ) + { + nbits = add( nbits1, 1 ); + *real_GR_ord = sub( GR_order, 1 ); + move16(); + } + ELSE + { + nbits = add( nbits, 1 ); + *real_GR_ord = GR_order; + move16(); + } + } + ELSE + { + *real_GR_ord = GR_order; + move16(); + } + + return nbits; +} +#else /*! r: number of bits using Golomb Rice code */ static int16_t GR_bits_new( uint16_t *data, /* i : data to encode with GR */ @@ -2728,6 +2881,7 @@ static int16_t GR_bits_new( return nbits; } +#endif /*------------------------------------------------------------------------- @@ -2833,7 +2987,60 @@ static int16_t GR_bits_azimuth_context( * * Golomb Rice encoding with mean removing *------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +/*! r: number of bits used */ +static Word16 mean_removed_GR_new( + const UWord16 *idx, /* i : data to encode */ + const Word16 max_no_symb, + const Word16 len, /* i : number of data */ + const Word16 adapt_GR, /* i : flag for telling to use or nor two GR order values */ + Word16 *GR_ord, /* i/o: GR order */ + UWord16 *p_av, /* o : average index */ + UWord16 *mr_idx /* o : mean removed indexes */ +) +{ + Word16 av, i, nbits; + Word16 sh_idx[MASA_MAXIMUM_CODING_SUBBANDS]; + Word16 max_ns[MASA_MAXIMUM_CODING_SUBBANDS]; + + /* av = (Word16) ( 0.5f + sum_s( (const Word16 *) idx, len ) / (float) len ); */ + av = div_l( L_add( L_shl( sum16_fx( (const Word16 *) idx, len ), 1 ), 1 ), len ); + + *p_av = av; + move16(); + FOR( i = 0; i < len; i++ ) + { + max_ns[i] = shl( max_no_symb, 1 ); + sh_idx[i] = sub( idx[i], av ); + move16(); + move16(); + } + + FOR( i = 0; i < len; i++ ) + { + IF( sh_idx[i] < 0 ) + { + sh_idx[i] = -2 * sh_idx[i]; + } + ELSE IF( sh_idx[i] > 0 ) + { + sh_idx[i] = sub( shl( sh_idx[i], 1 ), 1 ); + move16(); + } + ELSE + { + sh_idx[i] = 0; + move16(); + } + mr_idx[i] = (UWord16) sh_idx[i]; + move16(); + } + nbits = GR_bits_new( mr_idx, max_ns, len, *GR_ord, adapt_GR, GR_ord ); + + return nbits; +} +#else /*! r: number of bits used */ static int16_t mean_removed_GR_new( const uint16_t *idx, /* i : data to encode */ @@ -2878,13 +3085,31 @@ static int16_t mean_removed_GR_new( return nbits; } - +#endif /*------------------------------------------------------------------------- * ivas_qmetadata_encode_quasi_uniform_length() * *------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static Word16 ivas_qmetadata_encode_quasi_uniform_length( + const UWord16 value, + const UWord16 alphabet_size ) +{ + Word16 bits; + UWord16 tresh; + + bits = sub( 30, norm_l( alphabet_size ) ); /* bits = floor(log2(alphabet_size)) */ + tresh = (UWord16) L_sub( L_shl( 1U, add( bits, 1 ) ), alphabet_size ); + + IF( GE_32( value, tresh ) ) + { + bits = add( bits, 1 ); + } + return bits; +} +#else static int16_t ivas_qmetadata_encode_quasi_uniform_length( const uint16_t value, const uint16_t alphabet_size ) @@ -2902,7 +3127,7 @@ static int16_t ivas_qmetadata_encode_quasi_uniform_length( return bits; } - +#endif /*------------------------------------------------------------------------- * ivas_qmetadata_entropy_encode_dir() @@ -4111,7 +4336,45 @@ static int16_t ivas_qmetadata_get_optimal_gr_param( * * *------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +Word16 +ivas_qmetadata_encode_extended_gr_length( + const UWord16 value, + const UWord16 alphabet_size, + const Word16 gr_param ) +{ + UWord16 msb_alphabet_size; + Word16 bits; + UWord16 msb, lsb; + + msb_alphabet_size = (UWord16) L_shr( L_add( alphabet_size, L_sub( L_shl( 1U, gr_param ), 1 ) ), gr_param ); + + IF( LE_32( msb_alphabet_size, 3 ) ) + { + /* EncodeQuasiUniform is always equal or better than Limited GR with up to 3 msb values */ + bits = ivas_qmetadata_encode_quasi_uniform_length( value, alphabet_size ); + } + ELSE + { + msb = (UWord16) L_shr( value, gr_param ); + + bits = msb; /* leading one bits */ + move16(); + IF( LT_32( msb, L_sub( msb_alphabet_size, 1 ) ) ) + { + bits = add( bits, add( 1, gr_param ) ); /* terminating zero bit, if not the largest msb (Limited GR), and the lsb bits */ + } + ELSE + { + lsb = (UWord16) L_and( value, L_sub( L_shl( 1U, gr_param ), 1 ) ); + bits = add( bits, ivas_qmetadata_encode_quasi_uniform_length( lsb, (UWord16) L_sub( alphabet_size, L_shl( L_sub( msb_alphabet_size, 1 ), gr_param ) ) ) ); + } + } + + return bits; +} +#else int16_t ivas_qmetadata_encode_extended_gr_length( const uint16_t value, @@ -4148,7 +4411,7 @@ ivas_qmetadata_encode_extended_gr_length( return bits; } - +#endif /*------------------------------------------------------------------------- * ivas_qmetadata_reorder_elevation_index() @@ -4234,7 +4497,52 @@ static int16_t ivas_qmetadata_reorder_azimuth_index( * * *------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void ivas_qmetadata_encode_extended_gr( + BSTR_ENC_HANDLE hMetaData, + const UWord16 value, + const UWord16 alphabet_size, + const Word16 gr_param ) +{ + UWord16 msb_alphabet_size; + UWord16 msb, lsb, cnt; + + + msb_alphabet_size = (UWord16) L_shr( L_add( alphabet_size, L_sub( L_shl( 1U, gr_param ), 1 ) ), gr_param ); + + IF( LE_32( msb_alphabet_size, 3 ) ) + { + /* EncodeQuasiUniform is always equal or better than Limited GR with up to 3 msb values */ + ivas_qmetadata_encode_quasi_uniform( hMetaData, value, alphabet_size ); + } + ELSE + { + msb = (UWord16) L_shr( value, gr_param ); + lsb = (UWord16) L_and( value, L_sub( L_shl( 1U, gr_param ), 1 ) ); + + FOR( cnt = 0; cnt < msb; cnt++ ) + { + /* leading one bits */ + push_next_indice( hMetaData, 1, 1 ); + } + IF( LT_32( msb, L_sub( msb_alphabet_size, 1 ) ) ) + { + push_next_indice( hMetaData, 0, 1 ); /* terminating zero bit, if not the largest msb (Limited GR) */ + IF( gr_param > 0 ) + { + push_next_indice( hMetaData, lsb, gr_param ); + } + } + ELSE + { + ivas_qmetadata_encode_quasi_uniform( hMetaData, lsb, (UWord16) L_sub( alphabet_size, L_shl( L_sub( msb_alphabet_size, 1 ), gr_param ) ) ); + } + } + + return; +} +#else void ivas_qmetadata_encode_extended_gr( BSTR_ENC_HANDLE hMetaData, const uint16_t value, @@ -4279,7 +4587,7 @@ void ivas_qmetadata_encode_extended_gr( return; } - +#endif /*-----------------------------------------------------------------------* * Local functions (EC3, requantize directions) @@ -6682,7 +6990,132 @@ static void dct4_transform( * * *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static Word16 ivas_qmetadata_quantize_coherence_hr_512_fx( + IVAS_QMETADATA *hQMetaData, /* i/o: quantized metadata */ + const Word16 idx_d, /* i : current direction index */ + const Word16 all_coherence_zero, /* i : all coherence is zero - flag */ + BSTR_ENC_HANDLE hMetaData, /* i : metadata handle */ + const Word16 bits_coh ) +{ + Word16 j, k; + Word16 nbands, nblocks; + Word16 nbits; + Word16 nbits1, nbits0, nbits_av; + UWord16 idx_coh[MASA_MAXIMUM_CODING_SUBBANDS]; + IVAS_QDIRECTION *q_direction; + Word16 cbsize; + Word16 delta, tmp; + Word16 min_idx, GR_param, GR_param_av; + UWord16 av, mr_idx[MASA_MAXIMUM_CODING_SUBBANDS]; + + q_direction = &( hQMetaData->q_direction[idx_d] ); + nbands = q_direction->cfg.nbands; + nblocks = q_direction->cfg.nblocks; + nbits = 0; + move16(); + move16(); + move16(); + + IF( EQ_16( all_coherence_zero, 1 ) ) + { + return nbits; + } + nbits = hMetaData->nb_bits_tot; + move16(); + cbsize = shl( 1, bits_coh ); + // delta = 256.0f / cbsize; + delta = div_l( 256, shr( cbsize, 1 ) ); + + FOR( k = 0; k < nblocks; k++ ) + { + min_idx = 0; + move16(); + FOR( j = 0; j < nbands; j++ ) + { + idx_coh[j] = usquant_fx( (Word16) ( q_direction->coherence_band_data[j].spread_coherence[k] ), &tmp, shr( delta, 1 ), shr( delta, 1 ) /* Q-1 */, cbsize ); + move16(); + q_direction->coherence_band_data[j].spread_coherence[k] = (UWord8) add( imult1616( idx_coh[j], delta ), shr( delta, 1 ) ); + if ( LT_16( idx_coh[j], min_idx ) ) + { + min_idx = idx_coh[j]; + move16(); + } + } + + nbits0 = 0; + nbits1 = 0; + move16(); + move16(); + + FOR( j = 0; j < nbands; j++ ) + { + idx_coh[j] = sub( idx_coh[j], min_idx ); + move16(); + nbits0 = add( nbits0, ivas_qmetadata_encode_extended_gr_length( idx_coh[j], sub( cbsize, min_idx ), 0 ) ); + nbits1 = add( nbits1, ivas_qmetadata_encode_extended_gr_length( idx_coh[j], sub( cbsize, min_idx ), 1 ) ); + } + IF( nbits0 < nbits1 ) + { + GR_param = 0; + nbits1 = nbits0; + move16(); + move16(); + } + ELSE + { + GR_param = 1; + move16(); + } + + GR_param_av = 1; + move16(); + nbits_av = mean_removed_GR_new( idx_coh, cbsize, nbands, 1, &GR_param_av, &av, mr_idx ); + + IF( LT_16( nbits_av, nbits1 ) ) + { + nbits1 = nbits_av; + GR_param = GR_param_av; + move16(); + move16(); + + /* use average removed */ + push_next_indice( hMetaData, 1, 1 ); + + /* write average */ + push_next_indice( hMetaData, av, bits_coh ); + + /* write GR param */ + push_next_indice( hMetaData, GR_param, 1 ); + + FOR( j = 0; j < nbands; j++ ) + { + ivas_qmetadata_encode_extended_gr( hMetaData, mr_idx[j], imult1616( 2, cbsize ), GR_param ); + } + } + ELSE + { + /* use min removed */ + push_next_indice( hMetaData, 0, 1 ); + + /* write min index */ + push_next_indice( hMetaData, min_idx, bits_coh ); + + /* write GR param */ + push_next_indice( hMetaData, GR_param, 1 ); + + FOR( j = 0; j < nbands; j++ ) + { + ivas_qmetadata_encode_extended_gr( hMetaData, idx_coh[j], sub( cbsize, min_idx ), GR_param ); + } + } + } + + nbits = sub( hMetaData->nb_bits_tot, nbits ); + return nbits; +} +#else static int16_t ivas_qmetadata_quantize_coherence_hr_512( IVAS_QMETADATA *hQMetaData, /* i/o: quantized metadata */ const int16_t idx_d, /* i : current direction index */ @@ -6789,7 +7222,7 @@ static int16_t ivas_qmetadata_quantize_coherence_hr_512( nbits = hMetaData->nb_bits_tot - nbits; return nbits; } - +#endif /*-------------------------------------------------------------------* * ivas_qmetadata_quantize_coherence() @@ -7273,7 +7706,51 @@ static void ivas_qmetadata_reorder_2dir_bands( * * *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static Word16 write_2dir_info( + BSTR_ENC_HANDLE hMetaData, + UWord8 *twoDirBands, + const Word16 n, + const Word16 k ) +{ + Word16 nbits; + Word16 p[MASA_MAXIMUM_CODING_SUBBANDS]; + UWord16 dif_p[MASA_MAXIMUM_CODING_SUBBANDS]; + Word16 i, j; + j = 0; + p[0] = 0; + move16(); + move16(); + FOR( i = 0; i < n; i++ ) + { + IF( EQ_16( twoDirBands[i], 1 ) ) + { + p[j] = i; + j = add( j, 1 ); + move16(); + } + } + + dif_p[0] = p[0]; + move16(); + FOR( i = 1; i < j; i++ ) + { + dif_p[i] = sub( sub( p[i], p[i - 1] ), 1 ); + move16(); + } + + j = hMetaData->nb_bits_tot; + move16(); + FOR( i = 0; i < k; i++ ) + { + ivas_qmetadata_encode_extended_gr( hMetaData, dif_p[i], 24, 0 ); + } + nbits = sub( hMetaData->nb_bits_tot, j ); + + return nbits; +} +#else static int16_t write_2dir_info( BSTR_ENC_HANDLE hMetaData, uint8_t *twoDirBands, @@ -7311,7 +7788,7 @@ static int16_t write_2dir_info( return nbits; } - +#endif /*-------------------------------------------------------------------* * transform_azimuth_dir2() diff --git a/lib_enc/ivas_stat_enc.h b/lib_enc/ivas_stat_enc.h index e62ae7792..944f72c91 100644 --- a/lib_enc/ivas_stat_enc.h +++ b/lib_enc/ivas_stat_enc.h @@ -167,7 +167,7 @@ typedef struct stereo_dft_enc_data_struct float DFT[CPE_CHANNELS][STEREO_DFT_N_MAX_ENC]; int16_t dft_ovl; /* Overlap size */ int16_t dft_zp; /* Zero padding */ - +#ifndef IVAS_FLOAT_FIXED const float *win; /* DFT window */ const float *win_8k; /* DFT window */ const float *win_12k8; /* DFT window */ @@ -187,8 +187,8 @@ typedef struct stereo_dft_enc_data_struct const float *dft_trigo_12k8; const float *dft_trigo_16k; const float *dft_trigo_32k; +#endif int16_t dft_trigo_step; - float output_mem_res_8k[STEREO_DFT_OVL_8k]; /*I/O channel buffers */ @@ -197,24 +197,28 @@ typedef struct stereo_dft_enc_data_struct float output_mem_dmx_16k[STEREO_DFT_OVL_16k]; /*can hold 16, 12.8 or 32kHz signals*/ float output_mem_dmx_32k[STEREO_DFT_OVL_32k]; /*can hold 16, 12.8 or 32kHz signals*/ float output_mem_dmx_16k_shb[STEREO_DFT_OVL_16k]; +#ifndef IVAS_FLOAT_FIXED float input_mem_itd[CPE_CHANNELS][STEREO_DFT_OVL_MAX]; - +#endif /*Bands*/ int16_t band_res[STEREO_DFT_ENC_DFT_NB]; int16_t band_limits[STEREO_DFT_BAND_MAX + 1]; int16_t nbands; int16_t band_limits_dmx[STEREO_DFT_BAND_MAX + 1]; int16_t nbands_dmx; - +#ifndef IVAS_FLOAT_FIXED /*Stereo parameters*/ float past_nrgL[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX]; float past_nrgR[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX]; float past_dot_prod_real[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX]; float past_dot_prod_imag[STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX]; +#endif int16_t nrg_past_pos; /*Side Gain*/ +#ifndef IVAS_FLOAT_FIXED float side_gain[STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX]; +#endif Word32 side_gain_fx[STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX]; // Q31 int16_t side_gain_flag_1; int16_t side_gain_flag_2; @@ -230,21 +234,27 @@ typedef struct stereo_dft_enc_data_struct #endif /* Stereo CNG */ +#ifndef IVAS_FLOAT_FIXED float sidSideGain[STEREO_DFT_ERB4_BANDS]; - Word32 sidSideGain_fx[STEREO_DFT_ERB4_BANDS]; // Q31? +#endif float win_ana_energy; - Word16 win_ana_energy_fx; // Q15 + Word32 sidSideGain_fx[STEREO_DFT_ERB4_BANDS]; // Q31? + Word16 win_ana_energy_fx; // Q15 +#ifndef IVAS_FLOAT_FIXED float xspec_smooth[STEREO_DFT_N_32k_ENC]; + float sid_gipd; + float prev_sid_gipd; +#endif float Spd_L_smooth[STEREO_DFT_N_32k_ENC / 2]; float Spd_R_smooth[STEREO_DFT_N_32k_ENC / 2]; - float sid_gipd; int16_t coh_fade_counter; - float prev_sid_gipd; Word32 prev_sid_gipd_fx; // Q13 int16_t prev_sid_no_ipd_flag; /*IPD*/ +#ifndef IVAS_FLOAT_FIXED float gipd[STEREO_DFT_ENC_DFT_NB]; +#endif int16_t gipd_band_max; int16_t gipd_index; int16_t no_ipd_flag; /* flag to indicate when group IPD gets used */ @@ -272,12 +282,12 @@ typedef struct stereo_dft_enc_data_struct int16_t currentNumUpdates; int16_t expectedNumUpdates; /* Expected number of frames before use of ITD estimate */ int16_t resetFrames; - +#ifndef IVAS_FLOAT_FIXED /* energy buffers for ICBWE */ float nrg_L[2]; float nrg_R[2]; float nrg_DMX[2]; - +#endif /*Residual prediction*/ int16_t res_pred_mode[STEREO_DFT_ENC_DFT_NB]; /* mode from 0 (off) to 1 (on) */ float res_pred_gain[STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX]; /*prediction gain for the residual HFs */ @@ -651,10 +661,11 @@ typedef struct stereo_icbwe_enc_data_structure float icbwe_inp_mem[CPE_CHANNELS][NS2SA( 48000, L_MEM_RECALC_TBE_NS )]; float *dataChan[CPE_CHANNELS]; float memModifyFs_icbwe[CPE_CHANNELS][2 * L_FILT32k]; - +#ifndef IVAS_FLOAT_FIXED float mem_nrg_L[CPE_CHANNELS]; float mem_nrg_R[CPE_CHANNELS]; float mem_nrg_DMX[CPE_CHANNELS]; +#endif float gDes_pastFrame; float icbweRefEner; @@ -725,24 +736,24 @@ typedef struct ivas_stereo_classifier_data_structure /* features for xtalk classifier and UNCLR classifier */ Word16 clas_ch1; Word16 pitch_ch1[3]; - float voicing_ch1[3]; #ifndef IVAS_FLOAT_FIXED + float voicing_ch1[3]; float cor_map_sum_ch1; float lsf_ch1[M]; float lepsP_ch1; float dE1_ch1, dE1_ch2; float nchar_ch1, nchar_ch2; -#endif float non_sta_ch1; float sp_div_ch1; float ps_diff_ch1, ps_diff_ch2; - Word32 ps_diff_ch1_fx, ps_diff_ch2_fx; - Word16 ps_diff_ch1_e, ps_diff_ch2_e; float ps_sta_ch1, ps_sta_ch2; float prev_g_IPD; float prev_IPD; float prev_ratio_m1_m2; float ratio_L; +#endif + Word32 ps_diff_ch1_fx, ps_diff_ch2_fx; + Word16 ps_diff_ch1_e, ps_diff_ch2_e; Word16 vad_flag_glob; Word16 vad_relE; @@ -775,12 +786,12 @@ typedef struct ivas_stereo_classifier_data_structure float xtalk_wscore; float xtalk_score; float xtalk_score_wrelE; + float is_speech; #endif Word16 lrtd_mode; Word16 prev_lrtd_mode; - float is_speech; Word16 silence_flag; diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c index e4000c67b..b754db8de 100644 --- a/lib_enc/ivas_stereo_classifier.c +++ b/lib_enc/ivas_stereo_classifier.c @@ -421,8 +421,8 @@ void stereo_classifier_init( /* initialization of features for xtalk classifier and UNCLR classifier */ hStereoClassif->clas_ch1 = 0; set_s( hStereoClassif->pitch_ch1, 0, 3 ); - set_f( hStereoClassif->voicing_ch1, 0.0f, 3 ); #ifndef IVAS_FLOAT_FIXED + set_f( hStereoClassif->voicing_ch1, 0.0f, 3 ); hStereoClassif->cor_map_sum_ch1 = 0.0f; set_f( hStereoClassif->lsf_ch1, 0.0f, M ); hStereoClassif->lepsP_ch1 = 0.0f; @@ -430,34 +430,26 @@ void stereo_classifier_init( hStereoClassif->dE1_ch2 = 0.0f; hStereoClassif->nchar_ch1 = 0.0f; hStereoClassif->nchar_ch2 = 0.0f; -#endif hStereoClassif->non_sta_ch1 = 0.0f; hStereoClassif->sp_div_ch1 = 0.0f; - -#ifdef IVAS_FLOAT_FIXED - hStereoClassif->ps_diff_ch1_fx = 0; - hStereoClassif->ps_diff_ch2_fx = 0; - hStereoClassif->ps_sta_ch1_fx = 0; - hStereoClassif->ps_sta_ch2_fx = 0; -#endif hStereoClassif->ps_diff_ch1 = 0.0f; hStereoClassif->ps_diff_ch2 = 0.0f; - hStereoClassif->ps_sta_ch1 = 0.0f; hStereoClassif->ps_sta_ch2 = 0.0f; hStereoClassif->prev_g_IPD = 0.5f; hStereoClassif->prev_IPD = 0.0f; hStereoClassif->prev_ratio_m1_m2 = 0.0f; +#endif #ifndef IVAS_FLOAT_FIXED set_f( hStereoClassif->xtalk_score_buf, 0.0f, XTALK_SCORE_BUF_LEN ); -#endif hStereoClassif->ratio_L = 0.5f; +#endif hStereoClassif->vad_flag_glob = 0; hStereoClassif->vad_relE = 0; - hStereoClassif->is_speech = 0.0f; set_s( hStereoClassif->aEn_raw, 0, CPE_CHANNELS ); #ifndef IVAS_FLOAT_FIXED + hStereoClassif->is_speech = 0.0f; hStereoClassif->Etot_dn = 0.0f; hStereoClassif->Etot_up = 0.0f; set_f( hStereoClassif->relE_buf, 0.0f, UNCLR_L_RELE ); @@ -505,6 +497,8 @@ void stereo_classifier_init_fx( move16(); set16_fx( hStereoClassif->pitch_ch1, 0, 3 ); set_zero_fx( hStereoClassif->voicing_ch1_fx, 3 ); + hStereoClassif->voicing_ch1_e = 0; + move16(); hStereoClassif->cor_map_sum_ch1_fx = 0; move32(); set_zero_fx( hStereoClassif->lsf_ch1_fx, M ); @@ -512,32 +506,44 @@ void stereo_classifier_init_fx( move32(); hStereoClassif->dE1_ch1_fx = 0; move32(); - hStereoClassif->dE1_ch1_e = 31; + hStereoClassif->dE1_ch1_e = 0; move16(); hStereoClassif->dE1_ch2_fx = 0; move32(); - hStereoClassif->dE1_ch2_e = 31; + hStereoClassif->dE1_ch2_e = 0; move16(); hStereoClassif->nchar_ch1_fx = 0; move32(); - hStereoClassif->nchar_ch1_e = 31; + hStereoClassif->nchar_ch1_e = 0; move16(); hStereoClassif->nchar_ch2_fx = 0; move32(); - hStereoClassif->nchar_ch2_e = 31; + hStereoClassif->nchar_ch2_e = 0; move16(); hStereoClassif->non_sta_ch1_fx = 0; move32(); + hStereoClassif->non_sta_ch1_e = 0; + move16(); hStereoClassif->sp_div_ch1_fx = 0; move32(); + hStereoClassif->sp_div_ch1_e = 0; + move16(); hStereoClassif->ps_diff_ch1_fx = 0; move32(); + hStereoClassif->ps_diff_ch1_e = 0; + move16(); hStereoClassif->ps_diff_ch2_fx = 0; move32(); + hStereoClassif->ps_diff_ch2_e = 0; + move16(); hStereoClassif->ps_sta_ch1_fx = 0; move32(); + hStereoClassif->ps_sta_ch1_e = 0; + move16(); hStereoClassif->ps_sta_ch2_fx = 0; move32(); + hStereoClassif->ps_sta_ch2_e = 0; + move16(); hStereoClassif->prev_g_IPD_fx = ONE_IN_Q28; // 0.5f in Q29 move32(); hStereoClassif->prev_IPD_fx = 0; @@ -547,6 +553,8 @@ void stereo_classifier_init_fx( set_zero_fx( hStereoClassif->xtalk_score_buf_fx, XTALK_SCORE_BUF_LEN ); hStereoClassif->ratio_L_fx = 1073741824; // Q31 move32(); + hStereoClassif->ratio_L_e = 0; // Q31 + move16(); hStereoClassif->vad_flag_glob = 0; move16(); hStereoClassif->vad_relE = 0; diff --git a/lib_enc/ivas_stereo_dft_enc.c b/lib_enc/ivas_stereo_dft_enc.c index c596fcbb5..73ceaaa9f 100644 --- a/lib_enc/ivas_stereo_dft_enc.c +++ b/lib_enc/ivas_stereo_dft_enc.c @@ -669,10 +669,10 @@ static void stereo_dft_enc_open( #ifdef IVAS_FLOAT_FIXED hStereoDft->win_ana_energy_fx = (Word16) ( hStereoDft->win_ana_energy * ( 1 << 15 ) ); #endif - set_f( hStereoDft->output_mem_dmx_32k, 0, STEREO_DFT_OVL_32k ); hStereoDft->dft_zp = (int16_t) ( STEREO_DFT_ZP_MAX_ENC * input_Fs / 48000 ); - + set_f( hStereoDft->output_mem_dmx_32k, 0, STEREO_DFT_OVL_32k ); +#ifndef IVAS_FLOAT_FIXED hStereoDft->dft_trigo_8k = dft_trigo_32k; hStereoDft->dft_trigo_12k8 = dft_trigo_12k8; hStereoDft->dft_trigo_16k = dft_trigo_32k; @@ -687,7 +687,7 @@ static void stereo_dft_enc_open( hStereoDft->win_12k8 = win_syn_12k8; hStereoDft->win_16k = win_syn_16k; hStereoDft->win_32k = win_syn_32k; - +#endif // need to remove this once fixed function is called // #ifdef IVAS_FLOAT_FIXED hStereoDft->dft_trigo_8k_fx = dft_trigo_32k_fx; @@ -708,54 +708,60 @@ static void stereo_dft_enc_open( if ( input_Fs == 16000 ) { - hStereoDft->dft_trigo = dft_trigo_32k; hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP; - hStereoDft->win_ana = win_ana_16k; - hStereoDft->win = win_syn_16k; #ifdef IVAS_FLOAT_FIXED hStereoDft->dft_trigo_fx = dft_trigo_32k_fx; hStereoDft->win_ana_fx = win_ana_16k_fx; hStereoDft->win_fx = win_syn_16k_fx; +#else + hStereoDft->dft_trigo = dft_trigo_32k; + hStereoDft->win_ana = win_ana_16k; + hStereoDft->win = win_syn_16k; + #endif } else if ( input_Fs == 32000 ) { - hStereoDft->dft_trigo = dft_trigo_32k; hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_32k_STEP; - hStereoDft->win_ana = win_ana_32k; - hStereoDft->win = win_syn_32k; #ifdef IVAS_FLOAT_FIXED hStereoDft->dft_trigo_fx = dft_trigo_32k_fx; hStereoDft->win_ana_fx = win_ana_32k_fx; hStereoDft->win_fx = win_syn_32k_fx; +#else + hStereoDft->dft_trigo = dft_trigo_32k; + hStereoDft->win_ana = win_ana_32k; + hStereoDft->win = win_syn_32k; #endif } else { assert( input_Fs == 48000 ); - hStereoDft->dft_trigo = dft_trigo_48k; hStereoDft->dft_trigo_step = STEREO_DFT_TRIGO_SRATE_48k_STEP; - hStereoDft->win_ana = win_ana_48k; - hStereoDft->win = win_syn_48k; #ifdef IVAS_FLOAT_FIXED hStereoDft->dft_trigo_fx = dft_trigo_48k_fx; hStereoDft->win_ana_fx = win_ana_48k_fx; hStereoDft->win_fx = win_syn_48k_fx; +#else + hStereoDft->dft_trigo = dft_trigo_48k; + hStereoDft->win_ana = win_ana_48k; + hStereoDft->win = win_syn_48k; + #endif } - hStereoDft->win_mdct_8k = win_mdct_8k; #ifdef IVAS_FLOAT_FIXED hStereoDft->win_mdct_8k_fx = win_mdct_8k_fx; +#else + hStereoDft->win_mdct_8k = win_mdct_8k; #endif - +#ifdef IVAS_FLOAT_FIXED /*I/O Buffers*/ set_zero( hStereoDft->output_mem_dmx, STEREO_DFT_OVL_MAX ); set_zero( hStereoDft->output_mem_dmx_12k8, STEREO_DFT_OVL_12k8 ); set_zero( hStereoDft->output_mem_dmx_16k, STEREO_DFT_OVL_16k ); set_zero( hStereoDft->output_mem_dmx_16k_shb, STEREO_DFT_OVL_16k ); set_zero( hStereoDft->output_mem_res_8k, STEREO_DFT_OVL_8k ); - +#endif #ifdef IVAS_FLOAT_FIXED hStereoDft->dft_trigo_8k_fx = dft_trigo_32k_fx; hStereoDft->dft_trigo_12k8_fx = dft_trigo_12k8_fx; @@ -958,7 +964,9 @@ void stereo_dft_enc_reset( { int16_t i; /*reset parameters*/ +#ifndef IVAS_FLOAT_FIXED set_zero( hStereoDft->side_gain, STEREO_DFT_ENC_DFT_NB * STEREO_DFT_BAND_MAX ); +#endif set_s( hStereoDft->side_gain_index_EC, 15, STEREO_DFT_BAND_MAX ); set_s( hStereoDft->side_gain_index_ECDiff, 0, STEREO_DFT_BAND_MAX ); set_s( hStereoDft->side_gain_index_ECprevious, 15, STEREO_DFT_BAND_MAX ); @@ -969,7 +977,9 @@ void stereo_dft_enc_reset( #else hStereoDft->side_gain_bitdiff_lp = STEREO_DFT_BITDIFF_INIT; #endif +#ifndef IVAS_FLOAT_FIXED set_zero( hStereoDft->gipd, STEREO_DFT_ENC_DFT_NB ); +#endif set_zero( hStereoDft->dot_prod_real_smooth, STEREO_DFT_BAND_MAX ); set_zero( hStereoDft->dot_prod_img_smooth, STEREO_DFT_BAND_MAX ); #ifdef IVAS_FLOAT_FIXED @@ -989,10 +999,12 @@ void stereo_dft_enc_reset( set_s( hStereoDft->res_pred_index_ECDiff, 0, STEREO_DFT_BAND_MAX ); set_s( hStereoDft->res_pred_index_ECprevious, 0, STEREO_DFT_BAND_MAX ); hStereoDft->res_pred_counter = 0; +#ifndef IVAS_FLOAT_FIXED set_zero( hStereoDft->past_nrgL, STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX ); set_zero( hStereoDft->past_nrgR, STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX ); set_zero( hStereoDft->past_dot_prod_real, STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX ); set_zero( hStereoDft->past_dot_prod_imag, STEREO_DFT_NRG_PAST_LEN * STEREO_DFT_BAND_MAX ); +#endif hStereoDft->nrg_past_pos = 0; hStereoDft->res_dmx_ratio_lt = 1.0f; @@ -1038,15 +1050,16 @@ void stereo_dft_enc_reset( hStereoDft->sum_dot_prod_img = 0.f; /*Coherence*/ +#ifndef IVAS_FLOAT_FIXED set_f( hStereoDft->xspec_smooth, 1.0f, STEREO_DFT_N_32k_ENC ); + hStereoDft->sid_gipd = 0; + hStereoDft->prev_sid_gipd = 0; +#endif set_f( hStereoDft->Spd_L_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); set_f( hStereoDft->Spd_R_smooth, 1.0f, STEREO_DFT_N_32k_ENC / 2 ); - hStereoDft->currentNumUpdates = 0; hStereoDft->expectedNumUpdates = FIXED_SID_RATE; hStereoDft->resetFrames = 0; - hStereoDft->sid_gipd = 0; - hStereoDft->prev_sid_gipd = 0; hStereoDft->prev_sid_no_ipd_flag = 1; hStereoDft->coh_fade_counter = 0; @@ -1539,7 +1552,7 @@ void stereo_dft_enc_destroy( * * DFT analysis on a 20ms frame *-------------------------------------------------------------------------*/ - +#ifndef IVAS_FLOAT_FIXED void stereo_dft_enc_analyze( Encoder_State **sts, /* i/o: encoder state structure */ const int16_t n_channels, /* i : number of input channels */ @@ -1667,8 +1680,7 @@ void stereo_dft_enc_analyze( pop_wmops(); return; } - -#ifdef IVAS_FLOAT_FIXED +#else void stereo_dft_enc_analyze_fx( Encoder_State **sts, /* i/o: encoder state structure */ const Word16 n_channels, /* i : number of input channels */ @@ -2285,7 +2297,7 @@ Word32 stereo_dft_enc_synthesize_fx( pop_wmops(); return ( nrg_fx ); } -#endif +#else float stereo_dft_enc_synthesize( STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i/o: encoder stereo handle */ float *output, /* o : output synthesis */ @@ -2567,7 +2579,7 @@ float stereo_dft_enc_synthesize( pop_wmops(); return ( nrg ); } - +#endif /*------------------------------------------------------------------------- * stereo_dft_enc_process() diff --git a/lib_enc/ivas_stereo_icbwe_enc.c b/lib_enc/ivas_stereo_icbwe_enc.c index 676f60e54..6dc72ed1d 100644 --- a/lib_enc/ivas_stereo_icbwe_enc.c +++ b/lib_enc/ivas_stereo_icbwe_enc.c @@ -52,7 +52,7 @@ * * core switching reset of IC BWE memory *-------------------------------------------------------------------*/ - +#ifndef IVAS_FLOAT_FIXED static void ic_bwe_enc_reset( STEREO_ICBWE_ENC_HANDLE hStereoICBWE /* i/o: Stereo ICBWE handle */ ) @@ -74,7 +74,7 @@ static void ic_bwe_enc_reset( return; } - +#endif #ifdef IVAS_FLOAT_FIXED static void ic_bwe_enc_reset_fx( STEREO_ICBWE_ENC_HANDLE hStereoICBWE /* i/o: Stereo ICBWE handle */ @@ -82,22 +82,41 @@ static void ic_bwe_enc_reset_fx( { /* unscaled & scaled SHB synthesis memory */ set32_fx( hStereoICBWE->mem_lpc_shbsynth_nonref_fx, 0, LPC_SHB_ORDER ); - + hStereoICBWE->mem_lpc_shbsynth_nonref_e = 0; + move16(); /* inter-channel BWE spectral shape adj. */ hStereoICBWE->prevSpecMapping_fx = 0; move32(); hStereoICBWE->prevgsMapping_fx = ONE_IN_Q31; move32(); + hStereoICBWE->prevgsMapping_e = 0; + move16(); set32_fx( &( hStereoICBWE->memShbSpecMapping_fx ), 0, 1 ); + hStereoICBWE->memShbSpecMapping_e = 0; + move16(); set32_fx( hStereoICBWE->memShbSpecXcorr_fx, 0, 6 ); + hStereoICBWE->memShbSpecXcorr_e = 0; + move16(); set32_fx( hStereoICBWE->memGsEnerMap_fx, ONE_IN_Q31, 2 ); + hStereoICBWE->memGsEnerMap_e = 0; + move16(); set32_fx( hStereoICBWE->mem_nrg_L_fx, 0, 2 ); + hStereoICBWE->mem_nrg_L_fx_e = 0; + move16(); set32_fx( hStereoICBWE->mem_nrg_R_fx, 0, 2 ); + hStereoICBWE->mem_nrg_R_fx_e = 0; + move16(); set32_fx( hStereoICBWE->mem_nrg_DMX_fx, 0, 2 ); + hStereoICBWE->mem_nrg_DMX_fx_e = 0; + move16(); hStereoICBWE->gDes_pastFrame_fx = ONE_IN_Q31; move32(); + hStereoICBWE->gDes_pastFrame_e = 0; + move16(); hStereoICBWE->icbweRefEner_fx = 0; move32(); + hStereoICBWE->icbweRefEner_fx_e = 0; + move16(); return; } @@ -108,7 +127,7 @@ static void ic_bwe_enc_reset_fx( * * Encode and apply the spectral shape mapping of ref to the non-ref channel *---------------------------------------------------------------------------*/ - +#ifndef IVAS_FLOAT_FIXED static int16_t ic_bwe_enc_specMapping( const float *shb_frame_target, /* i : target shb */ float *shb_synth_nonref, /* o : non-ref shb synth */ @@ -198,7 +217,7 @@ static int16_t ic_bwe_enc_specMapping( return idx; } - +#endif #ifdef IVAS_FLOAT_FIXED_CONVERSIONS static Word16 ic_bwe_enc_specMapping_ivas_fx( const Word32 *shb_frame_target_fx, /* i : target shb */ @@ -418,6 +437,7 @@ static Word16 ic_bwe_enc_specMapping_ivas_fx( *-----------------------------------------------------------------------*/ /*! r: return quant. index value */ +#ifndef IVAS_FLOAT_FIXED static int16_t ic_bwe_enc_gsMapping( const float relG_targ, /* i : gDes, rel gain target */ const float *shbSynth, /* i : ref synth signal */ @@ -465,7 +485,7 @@ static int16_t ic_bwe_enc_gsMapping( return idx; } - +#endif /*----------------------------------------------------------------------* * ic_bwe_gsMapping() * @@ -566,7 +586,7 @@ static Word16 ic_bwe_enc_gsMapping_ivas_fx( * * Estimate ICBWE parameters in DFT stereo *-----------------------------------------------------------------------*/ - +#ifndef IVAS_FLOAT_FIXED static void icbwe_dft_stereo_param( STEREO_ICBWE_ENC_HANDLE hStereoICBWE, /* i/o: */ STEREO_DFT_ENC_DATA_HANDLE hStereoDft, /* i : */ @@ -660,7 +680,7 @@ static void icbwe_dft_stereo_param( return; } - +#endif #ifdef IVAS_FLOAT_FIXED static void icbwe_dft_stereo_param_ivas_fx( STEREO_ICBWE_ENC_HANDLE hStereoICBWE, /* i/o: */ @@ -828,7 +848,7 @@ static void icbwe_dft_stereo_param_ivas_fx( return; } #endif - +#ifndef IVAS_FLOAT_FIXED void stereo_icBWE_enc( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ const float shb_speech_ref[], /* i : SHB speech ref channel */ @@ -1065,7 +1085,7 @@ void stereo_icBWE_enc( return; } - +#endif void stereo_icBWE_enc_ivas_fx( CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */ @@ -1588,32 +1608,15 @@ void stereo_icBWE_init_enc( set_f( hStereoICBWE->icbwe_inp_mem[0], 0, NS2SA( 48000, L_MEM_RECALC_TBE_NS ) ); set_f( hStereoICBWE->icbwe_inp_mem[1], 0, NS2SA( 48000, L_MEM_RECALC_TBE_NS ) ); #endif - +#ifndef IVAS_FLOAT_FIXED set_f( hStereoICBWE->mem_nrg_L, 0, 2 ); set_f( hStereoICBWE->mem_nrg_R, 0, 2 ); set_f( hStereoICBWE->mem_nrg_DMX, 0, 2 ); +#endif hStereoICBWE->gDes_pastFrame = 1.0f; hStereoICBWE->icbweRefEner = 0.0f; hStereoICBWE->MSFlag = 0; - -#ifdef IVAS_FLOAT_FIXED - set32_fx( hStereoICBWE->mem_nrg_L_fx, 0, 2 ); - set32_fx( hStereoICBWE->mem_nrg_R_fx, 0, 2 ); - set32_fx( hStereoICBWE->mem_nrg_DMX_fx, 0, 2 ); - hStereoICBWE->gDes_pastFrame_fx = MAX_32; // Q31 - hStereoICBWE->gDes_pastFrame_e = 0; // Q31 - hStereoICBWE->icbweRefEner_fx = 0; - hStereoICBWE->prevgsMapping_fx = MAX_32; - hStereoICBWE->prevgsMapping_e = 0; - move32(); - move32(); - move32(); - move16(); - move16(); - -#endif - return; } @@ -1662,13 +1665,27 @@ void stereo_icBWE_init_enc_fx( set16_fx( hStereoICBWE->memModifyFs_icbwe_fx[1], 0, shl( L_FILT32k, 1 ) ); set32_fx( hStereoICBWE->mem_nrg_L_fx, 0, 2 ); + hStereoICBWE->mem_nrg_L_fx_e = 0; + move16(); set32_fx( hStereoICBWE->mem_nrg_R_fx, 0, 2 ); + hStereoICBWE->mem_nrg_R_fx_e = 0; + move16(); set32_fx( hStereoICBWE->mem_nrg_DMX_fx, 0, 2 ); + hStereoICBWE->mem_nrg_DMX_fx_e = 0; + move16(); + hStereoICBWE->gDes_pastFrame_fx = MAX_32; // Q31 hStereoICBWE->icbweRefEner_fx = 0; move32(); move32(); - + hStereoICBWE->gDes_pastFrame_fx = MAX_32; // Q31 + move32(); + hStereoICBWE->gDes_pastFrame_e = 0; // Q31 + move16(); + hStereoICBWE->prevgsMapping_fx = MAX_32; + move32(); + hStereoICBWE->prevgsMapping_e = 0; + move16(); hStereoICBWE->MSFlag = 0; move16(); diff --git a/lib_enc/ivas_stereo_switching_enc.c b/lib_enc/ivas_stereo_switching_enc.c index da6aeb8c7..deb108fae 100644 --- a/lib_enc/ivas_stereo_switching_enc.c +++ b/lib_enc/ivas_stereo_switching_enc.c @@ -369,8 +369,10 @@ ivas_error stereo_memory_enc_fx( { return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Stereo ICBWE \n" ) ); } - - stereo_icBWE_init_enc( hCPE->hStereoICBWE ); +#ifdef IVAS_FLOAT_FIXED + stereo_icBWE_init_enc( hCPE->hStereoICBWE ); //To be removed +#endif + stereo_icBWE_init_enc_fx( hCPE->hStereoICBWE ); } /* allocate HQ core in M channel */ diff --git a/lib_enc/speech_music_classif.c b/lib_enc/speech_music_classif.c index 24beb8445..15c13f0af 100644 --- a/lib_enc/speech_music_classif.c +++ b/lib_enc/speech_music_classif.c @@ -1148,756 +1148,7 @@ static int16_t attack_det( *---------------------------------------------------------------------*/ /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */ -#ifdef IVAS_FLOAT_FIXED -int16_t ivas_smc_gmm( - Encoder_State *st, /* i/o: state structure */ - STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */ - const int16_t localVAD_HE_SAD, /* i : HE-SAD flag without hangover */ - const float Etot, /* i : total frame energy */ - const float lsp_new[M], /* i : LSPs in current frame */ - const float cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.) */ - const float epsP[M + 1], /* i : LP prediciton error */ - const float PS[], /* i : energy spectrum */ - const float non_sta, /* i : unbound non-stationarity */ - const float relE, /* i : relative frame energy */ - int16_t *high_lpn_flag, /* i/o: sp/mus LPN flag */ - const int16_t flag_spitch /* i : flag to indicate very short stable pitch */ -) -{ - int16_t i, m, dec; - int16_t flag_odv; - float lps, lpm, lpn; - float ps[N_SMC_MIXTURES], pm[N_SMC_MIXTURES], pn[N_SMC_MIXTURES]; - float fvm[N_PCA_COEF], lprob; - float dlp, ftmp, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght; - float wrise; - float dlp_mean2var; - float FV[N_SMC_FEATURES], *pFV, PS_norm[128], dPS[128]; - const float *pODV; - float *pFV_st, smc_st_mean_fact; - int16_t relE_attack_flag; - int16_t j, len; - const float *pt_mel_fb; - float melS[NB_MEL_BANDS], mfcc[NB_MEL_BANDS]; - int16_t odv_cnt; - int16_t i_out[N_SMC_FEATURES], *p_out; - - /*------------------------------------------------------------------* - * Initialization - *------------------------------------------------------------------*/ - - SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; - - /*------------------------------------------------------------------* - * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE ) - *------------------------------------------------------------------*/ - - if ( localVAD_HE_SAD ) - { - if ( relE < -20 ) - { - if ( hSpMusClas->sp_mus_state > 0 ) - { - if ( hSpMusClas->sp_mus_state < HANG_LEN ) - { - /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */ - hSpMusClas->inact_cnt = 0; - } - - /* energy is too low -> we are going to instable state */ - hSpMusClas->sp_mus_state = 0; - } - else if ( hSpMusClas->sp_mus_state > -HANG_LEN ) - { - /* energy is still too low -> we are still in instable state */ - hSpMusClas->sp_mus_state--; - } - } - else if ( hSpMusClas->sp_mus_state <= 0 ) - { - if ( hSpMusClas->inact_cnt == 0 ) - { - - hSpMusClas->sp_mus_state = 1; - } - else - { - - hSpMusClas->sp_mus_state = HANG_LEN; - } - - hSpMusClas->inact_cnt = 12; - } - else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) - { - /* we are inside an entry period -> increment the counter of entry frames */ - hSpMusClas->sp_mus_state++; - } - - if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 ) - { - hSpMusClas->inact_cnt--; - } - } - else - { - if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) - { - hSpMusClas->inact_cnt = 0; - } - else if ( hSpMusClas->inact_cnt > 0 ) - { - hSpMusClas->inact_cnt--; - } - - if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) - { - hSpMusClas->sp_mus_state = -HANG_LEN; - } - else if ( hSpMusClas->sp_mus_state > 0 ) - { - hSpMusClas->sp_mus_state = -1; - } - else if ( hSpMusClas->sp_mus_state > -HANG_LEN ) - { - /* we are in inactive state */ - hSpMusClas->sp_mus_state--; - } - } - - /* detect attacks based on relE */ - if ( relE > hSpMusClas->prev_relE ) - { - hSpMusClas->relE_attack_sum += relE - hSpMusClas->prev_relE; - } - else - { - hSpMusClas->relE_attack_sum = 0; - } - hSpMusClas->prev_relE = relE; - - /* update counter from last VAD 0->1 change */ - if ( hSpMusClas->prev_vad == 0 && localVAD_HE_SAD == 1 ) - { - hSpMusClas->vad_0_1_cnt = 1; - } - else if ( localVAD_HE_SAD == 1 && hSpMusClas->vad_0_1_cnt > 0 && hSpMusClas->vad_0_1_cnt < 50 ) - { - hSpMusClas->vad_0_1_cnt++; - } - else - { - hSpMusClas->vad_0_1_cnt = 0; - } - hSpMusClas->prev_vad = localVAD_HE_SAD; - - if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && hSpMusClas->relE_attack_sum > 5.0f ) - { - hSpMusClas->relE_attack_cnt++; - - /* set flag only in the first X frames in a series */ - if ( hSpMusClas->relE_attack_cnt > 0 && hSpMusClas->relE_attack_cnt < 3 ) - { - relE_attack_flag = 1; - } - else - { - relE_attack_flag = 0; - } - } - else - { - hSpMusClas->relE_attack_cnt = 0; - relE_attack_flag = 0; - } - - hSpMusClas->prev_Etot = Etot; - - /*------------------------------------------------------------------* - * Preparation of the feature vector - *------------------------------------------------------------------*/ - - pFV = FV; - - /* [0] OL pitch */ - if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 ) - { - *pFV++ = (float) st->pitch[2]; - } - else - { - *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f; - } - - /* [1] voicing */ - if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 ) - { - *pFV++ = st->voicing[2]; - } - else - { - *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f; - } - - /* [2,3,4,5,6] LSFs */ - *pFV++ = acosf( lsp_new[2] ); - *pFV++ = acosf( lsp_new[3] ); - *pFV++ = acosf( lsp_new[4] ); - *pFV++ = acosf( lsp_new[5] ); - *pFV++ = acosf( lsp_new[6] ); - - /* [7] cor_map_sum */ - *pFV++ = cor_map_sum; - - /* [8] non_sta */ - *pFV++ = non_sta; - - /* [9] epsP */ - *pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f ); - - /* [10,11,12] MFCCs */ - set_zero( melS, NB_MEL_BANDS ); - pt_mel_fb = mel_fb; - for ( i = 0; i < NB_MEL_BANDS; i++ ) - { - j = mel_fb_start[i]; - len = mel_fb_len[i]; - melS[i] = logf( dotp( &PS[j], pt_mel_fb, len ) + 1e-5f ); - pt_mel_fb += len; - } - -#ifdef IVAS_FLOAT_FIXED - //////////////////// to be removed ////////////////////// - Word32 y_fx[NB_MEL_BANDS]; - Word32 x_fx[NB_MEL_BANDS]; - Word32 A_fx[NB_MEL_BANDS * NB_MEL_COEF]; - Word16 y_q_fx[NB_MEL_BANDS]; - Word16 x_q_fx[NB_MEL_BANDS]; - Word16 A_q_fx[NB_MEL_BANDS * NB_MEL_COEF]; - Word32 *pt_x_fx, *pt_A_fx; - const Float32 *pt_x, *pt_A; - Word16 *pt_x_q_fx, *pt_A_q_fx; - - pt_A_fx = A_fx; - pt_A_q_fx = A_q_fx; - pt_A = dct_mtx; - - FOR( i = 0; i < NB_MEL_COEF; i++ ) - { - pt_x = melS; - pt_x_fx = x_fx; - pt_x_q_fx = x_q_fx; - FOR( j = 0; j < NB_MEL_BANDS; j++ ) - { - IF( EQ_16( i, 0 ) ) - { - *pt_x_q_fx = sub( Q_factor_L( *pt_x ), 3 ); - *pt_x_fx++ = (Word32) ( *pt_x++ * ( W_shl( 1, *pt_x_q_fx++ ) ) ); - } - *pt_A_q_fx = sub( Q_factor_L( *pt_A ), 3 ); - *pt_A_fx++ = (Word32) ( *pt_A++ * ( W_shl( 1, *pt_A_q_fx++ ) ) ); - } - } - - v_mult_mat_fx( y_fx, y_q_fx, (const Word32 *) x_fx, x_q_fx, (const Word32 *) A_fx, A_q_fx, NB_MEL_BANDS, NB_MEL_COEF ); - - ////////////////////////////// to be removed //////////////////////// - FOR( i = 0; i < NB_MEL_COEF; i++ ) - { - IF( LT_16( y_q_fx[i], 0 ) ) - { - mfcc[i] = (Float32) y_fx[i] * W_shl( 1, -y_q_fx[i] ); - } - ELSE - { - mfcc[i] = (Float32) y_fx[i] / W_shl( 1, y_q_fx[i] ); - } - } - //////////////////////////////////////////////////////////////////// -#else - v_mult_mat( mfcc, melS, dct_mtx, NB_MEL_BANDS, NB_MEL_COEF ); -#endif - - *pFV++ = mfcc[2]; - *pFV++ = mfcc[6]; - *pFV++ = mfcc[12]; - - /* calculation of differential normalized power spectrum */ - sum_PS = 1e-5f; - for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) - { - sum_PS += PS[i]; - } - - for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) - { - PS_norm[i] = PS[i] / sum_PS; - dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] ); - } - - /* [13] ps_diff (spectral difference) */ - ps_diff = 0; - for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) - { - ps_diff += dPS[i]; - } - - *pFV++ = ps_diff; - - /* [14] ps_sta (spectral stationarity) */ - ps_sta = 0; - for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) - { - if ( PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] ) - { - ps_sta += PS_norm[i] / ( dPS[i] + 1e-5f ); - } - else - { - ps_sta += hSpMusClas->past_PS[i - LOWEST_FBIN] / ( dPS[i] + 1e-5f ); - } - } - - *pFV++ = logf( ps_sta + 1e-5f ); - mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN ); - - /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */ - if ( hStereoClassif != NULL ) - { - if ( st->idchan == 0 ) - { - hStereoClassif->ps_diff_ch1 = ps_diff; - hStereoClassif->ps_sta_ch1 = logf( ps_sta + 1e-5f ); - } - else - { - hStereoClassif->ps_diff_ch2 = ps_diff; - hStereoClassif->ps_sta_ch2 = logf( ps_sta + 1e-5f ); - } - } - - /*------------------------------------------------------------------* - * Outlier detection based on feature histograms - *------------------------------------------------------------------*/ - - flag_odv = 0; - if ( localVAD_HE_SAD ) - { - pFV = FV; - pODV = hout_intervals; - p_out = i_out; - odv_cnt = 0; - for ( i = 0; i < N_SMC_FEATURES; i++ ) - { - if ( *pFV < pODV[0] || *pFV > pODV[1] ) - { - *p_out++ = i; - odv_cnt++; - } - - pFV++; - pODV += 2; - } - - /* set outlier flag */ - if ( odv_cnt >= 2 ) - { - flag_odv = 1; - - /* replace outlying features with values from the previous frame */ - for ( i = 0; i < odv_cnt; i++ ) - { - FV[i_out[i]] = hSpMusClas->prev_FV[i_out[i]]; - } - } - } - - /*------------------------------------------------------------------* - * Adaptive short-term mean filter on feature vector - *------------------------------------------------------------------*/ - - pFV = FV; - pFV_st = hSpMusClas->FV_st; - smc_st_mean_fact = SMC_ST_MEAN_FACT; - for ( i = 0; i < N_SMC_FEATURES; i++ ) - { - *pFV_st = smc_st_mean_fact * ( *pFV_st ) + ( 1 - smc_st_mean_fact ) * ( *pFV ); - - if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) ) - { - /* strong attack or outlier frame during entry state -> features cannot be trusted but there is also no useful past info -> */ - /* -> do whatever you want because dlp will be reset to 0 anyway */ - pFV++; - pFV_st++; - } - else if ( hSpMusClas->sp_mus_state == HANG_LEN && ( st->tc_cnt == 1 || st->tc_cnt == 2 ) ) - { - /* energy attack in stable state -> use current features intead of the long-term average */ - pFV++; - pFV_st++; - } - else - { - *pFV++ = *pFV_st++; - } - } - - /* update */ - mvr2r( FV, hSpMusClas->prev_FV, N_SMC_FEATURES ); - - /*------------------------------------------------------------------* - * Non-linear power transformation (boxcox) on certain features - *------------------------------------------------------------------*/ - - pFV = FV; - for ( i = 0; i < N_SMC_FEATURES; i++ ) - { - if ( bcox_lmbd[i] != 0 ) - { - *pFV -= bcox_add_cnst[i]; - if ( *pFV < 1 ) - { - *pFV = 1; - } - *pFV = ( powf( *pFV, bcox_lmbd[i] ) - 1 ) / bcox_lmbd[i]; - } - - pFV++; - } - - /*------------------------------------------------------------------* - * Scaling of the feature vector - * PCA - *------------------------------------------------------------------*/ - - pFV = FV; - for ( i = 0; i < N_SMC_FEATURES; i++ ) - { - /* Standard scaler - mean and variance normalization */ - *pFV = ( *pFV - sm_means[i] ) / sm_scale[i]; - pFV++; - - /* MinMax sclaer - mean and variance normalization */ - /**pFV = *pFV * sm_scale[i] + sm_min[i];*/ - /*pFV++;*/ - } - - /* PCA */ - v_sub( FV, pca_mean_, FV, N_SMC_FEATURES ); - v_mult_mat( FV, FV, pca_components_, N_SMC_FEATURES, N_PCA_COEF ); - - /*------------------------------------------------------------------* - * Calculation of posterior probability - * Log-probability - *------------------------------------------------------------------*/ - -#ifdef IVAS_FLOAT_FIXED - Word32 fvm_fx[N_PCA_COEF], lprob_fx; - Word16 fvm_q, guard_bits; - guard_bits = find_guarded_bits_fx( N_PCA_COEF ); // 12! 479001600 -#endif - /* run loop for all mixtures (for each mixture, calculate the probability of speech, music and noise) */ - lps = lpm = lpn = 0; - for ( m = 0; m < N_SMC_MIXTURES; m++ ) - { - v_sub( FV, &means_speech[m * N_PCA_COEF], fvm, N_PCA_COEF ); - -#ifdef IVAS_FLOAT_FIXED - f2me_buf( fvm, fvm_fx, &fvm_q, N_PCA_COEF ); - fvm_q = sub( 31, fvm_q ); - scale_sig32( fvm_fx, N_PCA_COEF, sub( sub( fvm_q, guard_bits ), fvm_q ) ); - fvm_q = sub( fvm_q, guard_bits ); - - lprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); - lprob = fixedToFloat( lprob_fx, sub( 2 * fvm_q, 37 ) ); -#else - lprob = dot_product_cholesky( fvm, &prec_chol_speech[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); -#endif - ps[m] = logf( weights_speech[m] ) + log_det_chol_speech[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob; - - v_sub( FV, &means_music[m * N_PCA_COEF], fvm, N_PCA_COEF ); -#ifdef IVAS_FLOAT_FIXED - f2me_buf( fvm, fvm_fx, &fvm_q, N_PCA_COEF ); - fvm_q = sub( 31, fvm_q ); - scale_sig32( fvm_fx, N_PCA_COEF, sub( sub( fvm_q, guard_bits ), fvm_q ) ); - fvm_q = sub( fvm_q, guard_bits ); - lprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); - lprob = fixedToFloat( lprob_fx, sub( 2 * fvm_q, 37 ) ); -#else - lprob = dot_product_cholesky( fvm, &prec_chol_music[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); -#endif - pm[m] = logf( weights_music[m] ) + log_det_chol_music[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob; - - v_sub( FV, &means_noise[m * N_PCA_COEF], fvm, N_PCA_COEF ); -#ifdef IVAS_FLOAT_FIXED - f2me_buf( fvm, fvm_fx, &fvm_q, N_PCA_COEF ); - fvm_q = sub( 31, fvm_q ); - scale_sig32( fvm_fx, N_PCA_COEF, sub( sub( fvm_q, guard_bits ), fvm_q ) ); - fvm_q = sub( fvm_q, guard_bits ); - lprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); - lprob = fixedToFloat( lprob_fx, sub( 2 * fvm_q, 37 ) ); -#else - lprob = dot_product_cholesky( fvm, &prec_chol_noise[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); -#endif - pn[m] = logf( weights_noise[m] ) + log_det_chol_noise[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob; - } - - lps = logsumexp( ps, N_SMC_MIXTURES ); - lpm = logsumexp( pm, N_SMC_MIXTURES ); - lpn = logsumexp( pn, N_SMC_MIXTURES ); - - *high_lpn_flag = 0; - if ( lpn > lps && lpn > lpm ) - { - *high_lpn_flag = 1; - } - - hSpMusClas->lpm = lpm; - hSpMusClas->lps = lps; - hSpMusClas->lpn = lpn; - - /* determine HQ Generic speech class */ - if ( st->hHQ_core != NULL ) - { - if ( lps > lpm + 0.5f ) - { - st->hHQ_core->hq_generic_speech_class = 1; - } - else - { - st->hHQ_core->hq_generic_speech_class = 0; - } - } - - /*------------------------------------------------------------------* - * Decision without hangover - * Weighted decision - *------------------------------------------------------------------*/ - - /* decision without hangover (0 - speech/noise, 1 - music) */ - if ( !localVAD_HE_SAD || Etot < 10 || ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) ) ) - { - dlp = 0; - } - else - { - dlp = lpm - lps + DLP_BIAS; - - if ( dlp > 30.0f ) - { - dlp = 30.0f; - } - else if ( dlp < -30.0f ) - { - dlp = -30.0f; - } - } - - dec = dlp > 0; - - /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */ - wrelE = lin_interp( relE, 15.0f, 0.9f, -15.0f, 0.99f, 1 ); - - /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */ - hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp; - hSpMusClas->lt_dec_thres = hSpMusClas->dlp_mean_ST; - - if ( dlp < 0 && dlp < hSpMusClas->dlp_mean_ST ) - { - if ( hSpMusClas->dlp_mean_ST > 0 ) - { - hSpMusClas->wdrop = -dlp; - } - else if ( hSpMusClas->wdrop > 0 ) - { - hSpMusClas->wdrop += hSpMusClas->dlp_mean_ST - dlp; - } - } - else - { - hSpMusClas->wdrop = 0; - } - - wdrop = lin_interp( hSpMusClas->wdrop, 15.0f, 0.7f, 0.0f, 1.0f, 1 ); - - /* calculate weight based on rises of dlp (close to 1 during sudden rise of dlp, close to 0 otherwise) */ - if ( hSpMusClas->sp_mus_state == HANG_LEN && hSpMusClas->dlp_mean_ST > 0 && hSpMusClas->dlp_mean_ST > hSpMusClas->past_dlp_mean_ST[0] ) - { - if ( hSpMusClas->past_dlp_mean_ST[0] < 0 ) - { - hSpMusClas->wrise = hSpMusClas->dlp_mean_ST; - } - else if ( hSpMusClas->wrise > 0 ) - { - hSpMusClas->wrise += hSpMusClas->dlp_mean_ST - hSpMusClas->past_dlp_mean_ST[0]; - } - } - else - { - hSpMusClas->wrise = 0; - } - - wrise = lin_interp( hSpMusClas->wrise, 5.0f, 0.95f, 0.0f, 1.0f, 1 ); - - /* combine weights into one */ - wght = wrelE * wdrop * wrise; - - /* ratio of delta means vs. delta variances */ - if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) - { - hSpMusClas->dlp_mean_LT = dlp; - hSpMusClas->dlp_var_LT = 0; - } - - hSpMusClas->dlp_mean_LT = 0.9f * hSpMusClas->dlp_mean_LT + 0.1f * dlp; - ftmp = dlp - hSpMusClas->dlp_mean_LT; - hSpMusClas->dlp_var_LT = 0.9f * hSpMusClas->dlp_var_LT + 0.1f * ( ftmp * ftmp ); - - if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) - { - dlp_mean2var = 0; - } - else - { - dlp_mean2var = fabsf( hSpMusClas->dlp_mean_LT ) / ( sqrtf( fabsf( hSpMusClas->dlp_var_LT ) ) + 1.0f ); - } - - if ( dlp_mean2var > 15.0f ) - { - /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */ - wght *= 0.9f; - } - - if ( wght > 1.0f ) - { - wght = 1.0f; - } - else if ( wght < 0.01f ) - { - wght = 0.01f; - } - - if ( Etot < 10 ) - { - /* silence */ - wght = 0.92f; - } - - /* calculate weighted decision */ - hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp; - // printf( "\n%f ", hSpMusClas->wdlp_0_95_sp ); - - /* xtalk classifier: apply long hysteresis to prevent LRTD on music */ - hSpMusClas->wdlp_xtalk = 0.995f * hSpMusClas->wdlp_xtalk + 0.005f * dlp; - - /*------------------------------------------------------------------* - * Final speech/music decision - *------------------------------------------------------------------*/ - - if ( flag_spitch ) - { - hSpMusClas->flag_spitch_cnt = 5; - } - else if ( hSpMusClas->flag_spitch_cnt > 0 ) - { - hSpMusClas->flag_spitch_cnt--; - } - - if ( Etot < 10 ) - { - /* silence */ - dec = 0; - } - else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) - { - /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */ - ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp; - ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 ); - if ( ftmp > 2.0f ) - { - if ( dlp > 2.0f ) - { - dec = 2; - } - else - { - dec = 1; - } - } - else - { - dec = 0; - } - } - else - { - /* stable active state */ - if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 && - ( ( hSpMusClas->flag_spitch_cnt > 0 && hSpMusClas->wdlp_0_95_sp > 3.4f ) || ( hSpMusClas->flag_spitch_cnt == 0 && hSpMusClas->wdlp_0_95_sp > 2.1f ) ) ) - { - /* switching from speech to unclear */ - dec = 1; - } - else if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->vad_0_1_cnt < 50 && hSpMusClas->relE_attack_sum == 0.0f && hSpMusClas->wdlp_0_95_sp > 1.0f ) - { - /* switch from speech to unclear also during slowly rising weak music onsets */ - dec = 1; - } - else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp > 2.5f ) - { - /* switching from unclear to music */ - dec = 2; - } - else if ( hSpMusClas->past_dec[0] == 2 && hSpMusClas->past_dec[1] == 2 && hSpMusClas->past_dec[2] == 2 && hSpMusClas->wdlp_0_95_sp < -1.0f ) - { - /* switching from music to unclear */ - dec = 1; - } - else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < -2.5f ) - { - /* switching from unclear to speech */ - dec = 0; - } - else - { - dec = hSpMusClas->past_dec[0]; - } - } - - /*------------------------------------------------------------------* - * raw S/M decision based on smoothed GMM score - *------------------------------------------------------------------*/ - - if ( dec == 0 || st->hSpMusClas->wdlp_0_95_sp <= 0 ) - { - st->sp_aud_decision0 = 0; - st->sp_aud_decision1 = 0; - } - else - { - st->sp_aud_decision0 = 1; - st->sp_aud_decision1 = 1; - } - - /*------------------------------------------------------------------* - * Updates - *------------------------------------------------------------------*/ - - /* update buffer of past non-binary decisions */ - mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 ); - hSpMusClas->past_dlp[0] = dlp; - - mvr2r( &hSpMusClas->past_dlp_mean_ST[0], &hSpMusClas->past_dlp_mean_ST[1], HANG_LEN - 2 ); - hSpMusClas->past_dlp_mean_ST[0] = hSpMusClas->dlp_mean_ST; - - /* update buffer of past binary decisions */ - mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 ); - hSpMusClas->past_dec[0] = dec; - - - return dec; -} -#else +#ifndef IVAS_FLOAT_FIXED int16_t ivas_smc_gmm( Encoder_State *st, /* i/o: state structure */ STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */ diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 53f942deb..21cddef75 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -1812,12 +1812,12 @@ Word16 ivas_smc_gmm_fx( move32(); /* [9] epsP */ - temp32 = L_add( epsP_fx[14], L_shr( 21474, sub( 31, Q_esp ) ) ); + temp32 = L_add( epsP_fx[14], L_shr( 21475, sub( 31, Q_esp ) ) ); move32(); temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) ); temp32_log1 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/ - temp32 = L_add( epsP_fx[0], L_shr( 21474, sub( 31, Q_esp ) ) ); + temp32 = L_add( epsP_fx[0], L_shr( 21475, sub( 31, Q_esp ) ) ); move32(); temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( sub( Q31, Q_esp ), Q25 ) ); temp32_log2 = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/ @@ -1839,7 +1839,13 @@ Word16 ivas_smc_gmm_fx( len = mel_fb_len[i]; move16(); temp32 = dotp_me_fx( &PS_fx[j], pt_mel_fb_fx, len, 31 - Qfact_PS, Q1, &dotp_exp ); - temp32_log = L_add( BASOP_Util_Log2( temp32 ), L_shl( dotp_exp, Q25 ) ); + IF( LT_16( dotp_exp, -17 ) ) /*-18 is exponent of 10737:to avoid overflow when left shifting 10737*/ + { + temp32 = L_shr( temp32, sub( -17, dotp_exp ) ); + dotp_exp = -17; + move16(); + } + temp32_log = L_add( BASOP_Util_Log2( L_add( L_shr( temp32, 1 ), L_shr( 10737 /*1e-5f q30*/, dotp_exp ) ) ), L_shl( add( dotp_exp, 1 ), Q25 ) ); temp32_log = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/ melS_fx[i] = temp32_log; move32(); @@ -1895,18 +1901,18 @@ Word16 ivas_smc_gmm_fx( { IF( GT_32( PS_norm_fx[i], hSpMusClas->past_PS_fx[i - LOWEST_FBIN] ) ) { - temp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( PS_norm_fx[i], ( dPS_fx[i] + 1 ), &temp_exp ) ); // 31-temp_exp + temp32 = BASOP_Util_Divide3232_Scale_cadence( PS_norm_fx[i], ( L_add( dPS_fx[i], L_shr( 21475, sub( 31, Qfact_PS_past ) ) ) ), &temp_exp ); // 31-temp_exp ps_sta_fx = BASOP_Util_Add_Mant32Exp( temp32, temp_exp, ps_sta_fx, ps_sta_exp, &ps_sta_exp ); } ELSE { // ps_sta += hSpMusClas->past_PS[i - LOWEST_FBIN] / ( dPS[i] + 1e-5f ); - temp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( hSpMusClas->past_PS_fx[i - LOWEST_FBIN], ( dPS_fx[i] + 1 ), &temp_exp ) ); // 31-temp_exp + temp32 = BASOP_Util_Divide3232_Scale_cadence( hSpMusClas->past_PS_fx[i - LOWEST_FBIN], ( L_add( dPS_fx[i], L_shr( 21475, sub( 31, Qfact_PS_past ) ) ) ), &temp_exp ); // 31-temp_exp move32(); ps_sta_fx = BASOP_Util_Add_Mant32Exp( temp32, temp_exp, ps_sta_fx, ps_sta_exp, &ps_sta_exp ); } } - temp32_log = L_add( BASOP_Util_Log2( ps_sta_fx ), L_shl( ps_sta_exp, Q25 ) ); + temp32_log = L_add( BASOP_Util_Log2( L_add( ps_sta_fx, L_shr( 21475, ps_sta_exp ) ) ), L_shl( ps_sta_exp, Q25 ) ); temp32_log = Mpy_32_32( temp32_log, 1488522239 ); /*logf(x) = log2(x)*logf(2)*/ *pFV_fx++ = L_shr( temp32_log, Q5 ); // logf( ps_sta + 1e-5f ); move32(); @@ -2304,7 +2310,7 @@ Word16 ivas_smc_gmm_fx( wght_fx = 82; } move32(); - if ( LT_16( Etot_fx, 256 ) ) + if ( LT_16( Etot_fx, 2560 ) ) { /* silence */ wght_fx = 7537; @@ -2313,8 +2319,8 @@ Word16 ivas_smc_gmm_fx( /* calculate weighted decision */ // hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp; - hSpMusClas->wdlp_0_95_sp_fx = extract_l( L_add( L_shl( Mpy_32_16_1( wght_fx, hSpMusClas->wdlp_0_95_sp_fx ), Q2 ), Mpy_32_32( L_shl( L_sub( ONE_IN_Q13, wght_fx ), Q7 ), dlp_fx ) ) ); // Q8 - move16(); + hSpMusClas->wdlp_0_95_sp_32fx = L_add( Mpy_32_32( L_shl( wght_fx /*q13*/, 18 ), hSpMusClas->wdlp_0_95_sp_32fx /*q24*/ ), Mpy_32_32( L_shl( L_sub( ONE_IN_Q13, wght_fx /*q13*/ ), Q18 ), L_shl( dlp_fx /*q19*/, 5 ) ) ); // Q24 + move32(); /* xtalk classifier: apply long hysteresis to prevent LRTD on music */ @@ -2381,27 +2387,27 @@ Word16 ivas_smc_gmm_fx( test(); /* stable active state */ IF( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 && - ( ( hSpMusClas->flag_spitch_cnt > 0 && GT_16( hSpMusClas->wdlp_0_95_sp_fx, 870 ) ) || ( hSpMusClas->flag_spitch_cnt == 0 && GT_16( hSpMusClas->wdlp_0_95_sp_fx, 538 ) ) ) ) + ( ( hSpMusClas->flag_spitch_cnt > 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 57042534 /*3.4*(2^24)*/ ) ) || ( hSpMusClas->flag_spitch_cnt == 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 35232154 /*2.1*(2^24)*/ ) ) ) ) { /* switching from speech to unclear */ dec = 1; } - ELSE IF( hSpMusClas->past_dec[0] == 0 && LT_16( hSpMusClas->vad_0_1_cnt, 50 ) && hSpMusClas->relE_attack_sum_fx == 0 && GT_16( hSpMusClas->wdlp_0_95_sp_fx, 256 ) ) + ELSE IF( hSpMusClas->past_dec[0] == 0 && LT_16( hSpMusClas->vad_0_1_cnt, 50 ) && hSpMusClas->relE_attack_sum_fx == 0 && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 1 << 24 ) ) { /* switch from speech to unclear also during slowly rising weak music onsets */ dec = 1; } - ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && GT_16( hSpMusClas->wdlp_0_95_sp_fx, 640 ) ) + ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && GT_32( hSpMusClas->wdlp_0_95_sp_32fx, 41943040 /*2.5*2^24*/ ) ) { /* switching from unclear to music */ dec = 2; } - ELSE IF( EQ_16( hSpMusClas->past_dec[0], 2 ) && EQ_16( hSpMusClas->past_dec[1], 2 ) && EQ_16( hSpMusClas->past_dec[2], 2 ) && LT_16( hSpMusClas->wdlp_0_95_sp_fx, -256 ) ) + ELSE IF( EQ_16( hSpMusClas->past_dec[0], 2 ) && EQ_16( hSpMusClas->past_dec[1], 2 ) && EQ_16( hSpMusClas->past_dec[2], 2 ) && LT_32( hSpMusClas->wdlp_0_95_sp_32fx, -( 1 << 24 ) ) ) { /* switching from music to unclear */ dec = 1; } - ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && LT_16( hSpMusClas->wdlp_0_95_sp_fx, -640 ) ) + ELSE IF( EQ_16( hSpMusClas->past_dec[0], 1 ) && LT_32( hSpMusClas->wdlp_0_95_sp_32fx, -( 41943040 /*2.5*2^24*/ ) ) ) { /* switching from unclear to speech */ dec = 0; diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index a3e22d12d..699ec8325 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -873,6 +873,7 @@ typedef struct sp_mus_clas_structure Word16 wrise_fx; float wdlp_0_95_sp; Word16 wdlp_0_95_sp_fx; + Word32 wdlp_0_95_sp_32fx; float wdlp_xtalk; Word32 wdlp_xtalk_fx; int16_t sp_mus_state; diff --git a/lib_enc/swb_pre_proc.c b/lib_enc/swb_pre_proc.c index d0031ba47..f75269c2d 100644 --- a/lib_enc/swb_pre_proc.c +++ b/lib_enc/swb_pre_proc.c @@ -987,7 +987,7 @@ void swb_pre_proc_ivas_fx( return; } -#endif +#else void swb_pre_proc( Encoder_State *st, /* i/o: encoder state structure */ float *new_swb_speech, /* o : original input signal at 32kHz */ @@ -1471,3 +1471,4 @@ void swb_pre_proc( return; } +#endif \ No newline at end of file -- GitLab From a23491adc6ab4b1cb324c13cf4dd5dfa603d41b8 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 3 Oct 2024 12:27:56 +0530 Subject: [PATCH 2/2] Clang formatting changes --- lib_enc/ivas_stereo_switching_enc.c | 2 +- lib_enc/swb_pre_proc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_enc/ivas_stereo_switching_enc.c b/lib_enc/ivas_stereo_switching_enc.c index deb108fae..c8cabd743 100644 --- a/lib_enc/ivas_stereo_switching_enc.c +++ b/lib_enc/ivas_stereo_switching_enc.c @@ -370,7 +370,7 @@ ivas_error stereo_memory_enc_fx( return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for Stereo ICBWE \n" ) ); } #ifdef IVAS_FLOAT_FIXED - stereo_icBWE_init_enc( hCPE->hStereoICBWE ); //To be removed + stereo_icBWE_init_enc( hCPE->hStereoICBWE ); // To be removed #endif stereo_icBWE_init_enc_fx( hCPE->hStereoICBWE ); } diff --git a/lib_enc/swb_pre_proc.c b/lib_enc/swb_pre_proc.c index f75269c2d..30f627c40 100644 --- a/lib_enc/swb_pre_proc.c +++ b/lib_enc/swb_pre_proc.c @@ -1471,4 +1471,4 @@ void swb_pre_proc( return; } -#endif \ No newline at end of file +#endif -- GitLab