diff --git a/lib_com/options.h b/lib_com/options.h index 331530effcc876327cb6f716a1a186d873add088..6afb91c4862bf931c24c173d4a9536dd44dd7f3b 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -90,6 +90,7 @@ #define FIX_2280_REDUCTION_UNNECESSARY_SCALING /* VA: reduction of unnecessary scaling */ #define FIX_2280_REDUCTION_UNNECESSARY_SCALING_NONBE /* VA: reduction of unnecessary scaling, non-BE part */ #define FIX_2403_COMBINE_PITCH_OL /* VA : basop 2403, reusing common code between EVS and IVAS in pitch_ol */ +#define FIX_2404_HARM_SIGNAL_CLAS /* VA: basop-2404, harmonize signal_clas and signa_ivas_clas */ #define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ /* #################### End BE switches ################################## */ diff --git a/lib_enc/init_enc_fx.c b/lib_enc/init_enc_fx.c index 0bb93ddc361b1fc0fef842345c4015a78dff6907..e06b42f68c2816e42502a1219dc602b9649fcccb 100644 --- a/lib_enc/init_enc_fx.c +++ b/lib_enc/init_enc_fx.c @@ -557,6 +557,7 @@ ivas_error init_encoder_fx( #else speech_music_clas_init_fx( st->element_mode, st->hSpMusClas ); #endif + st->sp_aud_decision0 = 0; move16(); st->sp_aud_decision1 = 0; diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c index 6e5a73a964c4a560d540ecfa5bbc7a961d4b1c1e..e4576ceaaf52f180e4bd2a06bc5bbbce1197c1ca 100644 --- a/lib_enc/ivas_core_pre_proc_front_fx.c +++ b/lib_enc/ivas_core_pre_proc_front_fx.c @@ -1254,7 +1254,11 @@ void pre_proc_front_ivas_fx( * TC frame selection *-----------------------------------------------------------------*/ +#ifndef FIX_2404_HARM_SIGNAL_CLAS st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ +#else + st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ +#endif move16(); select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index e0f9c4b536abd8288ad2312c9f30378bb295f1b8..4310137b040c22d4cba43eed431f95a65af86a7d 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -511,7 +511,7 @@ Word16 signal_clas_fx( const Word16 L_look, /* i : look-ahead */ Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); - +#ifndef FIX_2404_HARM_SIGNAL_CLAS /* o : classification for current frames */ Word16 signal_clas_ivas_fx( Encoder_State *st, /* i/o: encoder state structure */ @@ -521,6 +521,7 @@ Word16 signal_clas_ivas_fx( const Word16 L_look, /* i : look-ahead */ Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); +#endif void speech_music_classif_fx( Encoder_State *st, /* i/o: state structure */ @@ -1127,7 +1128,7 @@ void coder_type_modif_fx( void speech_music_clas_init_fx( #ifdef FIX_2405_HARM_SMC_INIT - const Word16 element_mode, /* element mode to differentiate IVAS only init */ + const Word16 element_mode, /* i : element mode */ #endif SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */ ); diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index ccc1e38e918d12836208aaf80b8bed6869d526cc..19c17e404f2d9255ec0649673b5fceb0b79e198f 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -31,9 +31,9 @@ #define K_SNR_FX 3541 /* Q15 .1111 */ #define C_SNR_FX -10921 /* Q15 -0.3333f */ - #define THRES_EEN 514206 /* 251.077 => (10^(1/(K_EE*10))) Q11*/ + /*-------------------------------------------------------------------* * signal_clas_fx() * @@ -41,13 +41,14 @@ * TC frames selection *-------------------------------------------------------------------*/ -Word16 signal_clas_fx( /* o : classification for current frames */ - Encoder_State *st, /* i/o: encoder state structure */ - const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ - const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ - const Word16 relE, /* i : frame relative E to the long term average in Q8 */ - const Word16 L_look, /* i : look-ahead */ - Word16 *clas_mod /* o : class flag for NOOP detection */ +/* o : classification for current frames */ +Word16 signal_clas_fx( + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ + const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ + const Word16 relE, /* i : frame relative E to the long term average in Q8 */ + const Word16 L_look, /* i : look-ahead */ + Word16 *clas_mod /* o : class flag for NOOP detection */ ) { Word32 Ltmp; @@ -55,6 +56,9 @@ Word16 signal_clas_fx( /* o : classification for current Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee; Word16 tmp16, tmpS; const Word16 *pt1; +#ifdef FIX_2404_HARM_SIGNAL_CLAS + Word64 tmp64; +#endif Flag Overflow = 0; move32(); @@ -69,34 +73,61 @@ Word16 signal_clas_fx( /* o : classification for current Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */ mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); - /* average spectral tilt in dB */ - lo = L_Extract_lc( ee[0], &hi ); - lo2 = L_Extract_lc( ee[1], &hi2 ); - Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ - - test(); - test(); - IF( LT_32( Ltmp, 2048 ) ) - { - een = 0; - move16(); - } - ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) +#ifdef FIX_2404_HARM_SIGNAL_CLAS + IF( st->element_mode != EVS_MONO ) { - een = 512; - move16(); + tmp64 = W_mult0_32_32( ee[0], ee[1] ); + exp_ee = W_norm( tmp64 ); + Ltmp = W_extract_h( W_shl( tmp64, exp_ee ) ); // Q = Q6+Q6 + exp_ee - 32 + exp_ee = sub( 31, sub( add( Q12, exp_ee ), 32 ) ); + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Ltmp, exp_ee, ONE_IN_Q31, 0 ), -1 ) ) + { + een = 0; + move16(); + } + ELSE + { + /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ + /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ + Ltmp = BASOP_Util_Log10( Ltmp, exp_ee ); // Q25 + Ltmp = Mpy_32_32( Ltmp, 671088640 /*20.f in Q25*/ ); // Q25 + Q25 -Q31 = Q19 * 0.5 = Q20 + een = extract_l( L_shl( Mpy_32_16_1( Ltmp, K_EE_FX ), Q9 - Q20 ) ); // Q9 + een = s_min( s_max( een, 0 ), 512 ); + } } ELSE +#endif { - /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ - /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ - exp_ee = norm_l( Ltmp ); - frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); - exp_ee = sub( 30 - 11, exp_ee ); - Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ - een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ - een = mac_r( C_EE_FX, een, K_EE_FX ); + /* average spectral tilt in dB */ + lo = L_Extract_lc( ee[0], &hi ); + lo2 = L_Extract_lc( ee[1], &hi2 ); + Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ + + test(); + test(); + IF( LT_32( Ltmp, 2048 ) ) + { + een = 0; + move16(); + } + ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) + { + een = 512; + move16(); + } + ELSE + { + /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ + /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ + exp_ee = norm_l( Ltmp ); + frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); + exp_ee = sub( 30 - 11, exp_ee ); + Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ + een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ + een = mac_r( C_EE_FX, een, K_EE_FX ); + } } + /* compute zero crossing rate */ pt1 = speech + sub( L_look, 1 ); tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ @@ -114,6 +145,7 @@ Word16 signal_clas_fx( /* o : classification for current pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) ); st->tdm_pc = pc; move16(); + /*-----------------------------------------------------------------* * Transform parameters to the range <0:1> * Compute the merit function @@ -241,8 +273,8 @@ Word16 signal_clas_fx( /* o : classification for current BREAK; } } - /* Onset classification */ + /* Onset classification */ /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */ /* tc_cnt == 0: UC frame */ /* tc_cnt == 1: onset/transition frame, coded by GC coder type */ @@ -266,9 +298,11 @@ Word16 signal_clas_fx( /* o : classification for current st->tc_cnt = -1; move16(); } + return clas; } +#ifndef FIX_2404_HARM_SIGNAL_CLAS Word16 signal_clas_ivas_fx( /* o : classification for current frames */ Encoder_State *st, /* i/o: encoder state structure */ const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ @@ -498,6 +532,7 @@ Word16 signal_clas_ivas_fx( /* o : classification for cur } return clas; } +#endif /*-------------------------------------------------------------------* * select_TC_fx() @@ -519,6 +554,7 @@ void select_TC_fx( * Select TC coder type for appropriate frames which is in general VOICED_TRANSITION, * VOICED_CLAS or ONSET frames following UNVOICED_CLAS frames *---------------------------------------------------------------------*/ + test(); IF( localVAD != 0 && GE_16( tc_cnt, 1 ) ) { @@ -540,21 +576,21 @@ void select_TC_fx( return; } + /*-------------------------------------------------------------------* * coder_type_modif_fx() * * Coder type modification *-------------------------------------------------------------------*/ + void coder_type_modif_fx( Encoder_State *st, /* i/o: encoder state structure */ const Word16 relE /* i : frame relative E to the long term average */ ) { Word16 unmod_coder_type, vbr_generic_ho; - SC_VBR_ENC_HANDLE hSC_VBR = st->hSC_VBR; - IF( st->Opt_SC_VBR ) { vbr_generic_ho = hSC_VBR->vbr_generic_ho; @@ -582,8 +618,8 @@ void coder_type_modif_fx( test(); test(); test(); - if ( ( st->element_mode == 0 && GT_32( st->total_brate, ACELP_9k60 ) && EQ_16( st->coder_type, UNVOICED ) ) || - ( st->element_mode > 0 && GT_32( st->total_brate, MAX_UNVOICED_BRATE ) && EQ_16( st->coder_type, UNVOICED ) ) ) + if ( ( st->element_mode == EVS_MONO && GT_32( st->total_brate, ACELP_9k60 ) && EQ_16( st->coder_type, UNVOICED ) ) || + ( st->element_mode > EVS_MONO && GT_32( st->total_brate, MAX_UNVOICED_BRATE ) && EQ_16( st->coder_type, UNVOICED ) ) ) { st->coder_type = GENERIC; move16(); @@ -609,8 +645,7 @@ void coder_type_modif_fx( test(); test(); test(); - if ( st->localVAD == 0 && ( ( - EQ_16( st->coder_type, UNVOICED ) && ( ( st->Opt_SC_VBR == 0 ) || ( ( EQ_16( st->Opt_SC_VBR, 1 ) ) && vbr_generic_ho == 0 && GT_16( st->last_coder_type, UNVOICED ) ) ) ) || + if ( st->localVAD == 0 && ( ( EQ_16( st->coder_type, UNVOICED ) && ( ( st->Opt_SC_VBR == 0 ) || ( ( EQ_16( st->Opt_SC_VBR, 1 ) ) && vbr_generic_ho == 0 && GT_16( st->last_coder_type, UNVOICED ) ) ) ) || EQ_16( st->coder_type, TRANSITION ) || EQ_16( st->coder_type, VOICED ) ) ) @@ -657,7 +692,7 @@ void coder_type_modif_fx( } } - IF( st->element_mode == 0 ) + IF( st->element_mode == EVS_MONO ) { /* At higher rates and with 16kHz core, allow only GC and TC coder type */ test(); diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index 6edaa31a35f6f3209d21e291fe6767fc55376280..9db4b6b042077b0d2bd22d0098e95ae72354eefa 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -36,6 +36,18 @@ #define THR_MASS_STEP_UP_FX 41943 /* 0.01f in Q22 */ #define THR_MASS_STEP_DN_FX 83886 /* 0.02f in Q22 */ +// Q18 +static Word32 log_weights_speech_compute[N_SMC_MIXTURES] = { + -578045, -483403, -473370, -468152, -379470, -473234 +}; +static Word32 log_weights_music_compute[N_SMC_MIXTURES] = { + -486797, -522830, -315523, -429999, -775981, -477255 +}; +static Word32 log_weights_noise_compute[N_SMC_MIXTURES] = { + -439941, -576743, -269243, -645452, -529228, -542196 +}; + + /*---------------------------------------------------------------------* * Local functions *---------------------------------------------------------------------*/ @@ -55,21 +67,14 @@ static Word16 attack_det_fx( const Word16 *inp, const Word16 Qx, const Word16 la static void order_spectrum_fx( Word16 *vec, Word16 len ); static void detect_sparseness_fx( Encoder_State *st_fx, const Word16 localVAD_HE_SAD, const Word16 voi_fv ); -// Q18 -Word32 log_weights_speech_compute[N_SMC_MIXTURES] = { - -578045, -483403, -473370, -468152, -379470, -473234 -}; -Word32 log_weights_music_compute[N_SMC_MIXTURES] = { - -486797, -522830, -315523, -429999, -775981, -477255 -}; -Word32 log_weights_noise_compute[N_SMC_MIXTURES] = { - -439941, -576743, -269243, -645452, -529228, -542196 -}; + + /*---------------------------------------------------------------------* * speech_music_clas_init_fx() * * Initialization of speech/music classifier *---------------------------------------------------------------------*/ + #ifndef FIX_2405_HARM_SMC_INIT void speech_music_clas_init_fx( SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */ @@ -225,9 +230,9 @@ void speech_music_clas_init_fx( void speech_music_clas_init_ivas_fx( #else void speech_music_clas_init_fx( - const Word16 element_mode, /* element mode to differentiate IVAS only init */ + const Word16 element_mode, /* i : element mode */ #endif - SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */ + SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */ ) { @@ -451,6 +456,7 @@ void speech_music_clas_init_fx( return; } + /*---------------------------------------------------------------------* * speech_music_classif() * @@ -495,8 +501,6 @@ void speech_music_classif_fx( test(); IF( EQ_16( st->codec_mode, MODE1 ) || EQ_32( st->sr_core, INT_FS_12k8 ) ) { - - /* Improvement of the 1st stage decision on mixed/music content */ test(); IF( st->Opt_SC_VBR == 0 && NE_32( st->total_brate, ACELP_24k40 ) ) @@ -526,7 +530,6 @@ void speech_music_classif_fx( } } - /* Context-based improvement of 1st and 2nd stage decision on stable tonal signals */ test(); IF( st->Opt_SC_VBR == 0 && NE_32( st->total_brate, ACELP_24k40 ) ) @@ -599,26 +602,28 @@ void speech_music_classif_fx( return; } + /*---------------------------------------------------------------------* * sp_mus_classif_gmm_fx() * * Speech/music classification based on GMM model *---------------------------------------------------------------------*/ -static Word16 sp_mus_classif_gmm_fx( /* o : decision flag (1-music, 0-speech or noise) */ - Encoder_State *st_fx, /* i/o: state structure */ - const Word16 localVAD_HE_SAD, /* i : local VAD HE flag */ - const Word16 lsp_new[M], /* i : LSPs in current frame Q15 */ - const Word16 cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.)Q8 */ - const Word32 epsP[M + 1], /* i : LP prediciton error Q_esp */ - const Word32 PS[], /* i : energy spectrum Q_new+Qscale-2 */ - Word16 non_sta, /* i : unbound non-stationarity for sp/mus classifier */ - Word16 relE, /* i : relative frame energy */ - Word16 *voi_fv, /* o : scaled voicing feature */ - Word16 *cor_map_sum_fv, /* o : scaled correlation map feature */ - Word16 *LPCErr, /* o : scaled LP prediction error feature */ - Word16 Q_esp, /* i : scaling of epsP */ - Word16 *high_lpn_flag_ptr /* o : noise log prob flag for NOISE_EST */ +/* o : decision flag (1-music, 0-speech or noise) */ +static Word16 sp_mus_classif_gmm_fx( + Encoder_State *st_fx, /* i/o: state structure */ + const Word16 localVAD_HE_SAD, /* i : local VAD HE flag */ + const Word16 lsp_new[M], /* i : LSPs in current frame Q15 */ + const Word16 cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.)Q8 */ + const Word32 epsP[M + 1], /* i : LP prediciton error Q_esp */ + const Word32 PS[], /* i : energy spectrum Q_new+Qscale-2 */ + Word16 non_sta, /* i : unbound non-stationarity for sp/mus classifier */ + Word16 relE, /* i : relative frame energy */ + Word16 *voi_fv, /* o : scaled voicing feature */ + Word16 *cor_map_sum_fv, /* o : scaled correlation map feature */ + Word16 *LPCErr, /* o : scaled LP prediction error feature */ + Word16 Q_esp, /* i : scaling of epsP */ + Word16 *high_lpn_flag_ptr /* o : noise log prob flag for NOISE_EST */ ) { Word16 i, k, p, dec, vad; @@ -932,7 +937,6 @@ static Word16 sp_mus_classif_gmm_fx( /* o : decis *LPCErr = FV[9]; move16(); - /*------------------------------------------------------------------* * Calculation of posterior probability * Log-probability @@ -943,7 +947,6 @@ static Word16 sp_mus_classif_gmm_fx( /* o : decis /* pyn = 1e-5f;*/ max_n = L_add( MIN_32, 0 ); - FOR( k = 0; k < N_MIXTURES; k++ ) { /* for each mixture, calculate the probability of speech or noise and the probability of music */ @@ -960,7 +963,6 @@ static Word16 sp_mus_classif_gmm_fx( /* o : decis py_s = L_add( lvm_speech_fx[k], L_tmp ); /*Q10 */ max_s = L_max( py_s, max_s ); - /* pys += (float)exp(py); */ /* inactive frames - calculate the probability of noise */ @@ -1016,7 +1018,6 @@ static Word16 sp_mus_classif_gmm_fx( /* o : decis move16(); } - IF( !vad ) { /* increase log-probability of noise */ @@ -1157,7 +1158,6 @@ static Word16 sp_mus_classif_gmm_fx( /* o : decis /*wrelE = 1.0f + relE/15;*/ wrelE = add( 2048, mult_r( relE, 17476 ) ); /* 1/15 in Q18 -> 17476 result in Q11 */ - wrelE = s_min( wrelE, 2048 ); wrelE = s_max( wrelE, 20 ); @@ -1259,7 +1259,6 @@ static Word16 sp_mus_classif_gmm_fx( /* o : decis } } - /*------------------------------------------------------------------* * Updates *------------------------------------------------------------------*/ @@ -1444,13 +1443,14 @@ static void var_cor_calc_fx( * Attack detection *---------------------------------------------------------------------*/ -static Word16 attack_det_fx( /* o : attack flag */ - const Word16 *inp, /* i : input signal */ - const Word16 Qx, - const Word16 last_clas, /* i : last signal clas */ - const Word16 localVAD, /* i : local VAD flag */ - const Word16 coder_type, /* i : coder type */ - const Word32 total_brate /* i : total bitrate */ +/* o : attack flag */ +static Word16 attack_det_fx( + const Word16 *inp, /* i : input signal */ + const Word16 Qx, + const Word16 last_clas, /* i : last signal clas */ + const Word16 localVAD, /* i : local VAD flag */ + const Word16 coder_type, /* i : coder type */ + const Word32 total_brate /* i : total bitrate */ ) { Word16 i, j, tmp, tmp1, attack, exp1; @@ -1566,11 +1566,13 @@ static Word16 attack_det_fx( /* o : attack flag return attack; } -/* -------------------------------------------------------------------- - * + +/* --------------------------------------------------------------------- * *ivas_smc_gmm() * *1st stage of the speech / music classification(based on the GMM model) - * -------------------------------------------------------------------- - */ + * --------------------------------------------------------------------- */ + /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */ Word16 ivas_smc_gmm_fx( Encoder_State *st, /* i/o: state structure */ @@ -1618,12 +1620,9 @@ Word16 ivas_smc_gmm_fx( Word16 temp16; Word16 dotp_exp = 0; move16(); - /*------------------------------------------------------------------* - * Initialization - *------------------------------------------------------------------*/ - SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; Word32 temp_sqrt, temp_acos; + /*------------------------------------------------------------------* * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE ) *------------------------------------------------------------------*/ @@ -1853,7 +1852,6 @@ Word16 ivas_smc_gmm_fx( temp16 = lsp_new_fx[6]; move16(); - temp32 = L_sub( ONE_IN_Q30, L_mult0( temp16, temp16 ) ); // Q30 temp_sqrt = Sqrt32( temp32, &temp_exp ); temp_acos = BASOP_util_atan2( temp_sqrt, L_deposit_h( temp16 ), temp_exp ); @@ -2035,6 +2033,7 @@ Word16 ivas_smc_gmm_fx( /*------------------------------------------------------------------* * Outlier detection based on feature histograms *------------------------------------------------------------------*/ + flag_odv = 0; move16(); IF( localVAD_HE_SAD ) @@ -2074,6 +2073,7 @@ Word16 ivas_smc_gmm_fx( /*------------------------------------------------------------------* * Adaptive short-term mean filter on feature vector *------------------------------------------------------------------*/ + Qfact_FV = 20; move16(); pFV_fx = FV_fx; @@ -2117,6 +2117,7 @@ Word16 ivas_smc_gmm_fx( /*------------------------------------------------------------------* * Non-linear power transformation (boxcox) on certain features *------------------------------------------------------------------*/ + pFV_fx = FV_fx; FOR( i = 0; i < N_SMC_FEATURES; i++ ) { @@ -2239,6 +2240,7 @@ Word16 ivas_smc_gmm_fx( * Decision without hangover * Weighted decision *------------------------------------------------------------------*/ + test(); test(); test(); @@ -2318,7 +2320,6 @@ Word16 ivas_smc_gmm_fx( move16(); } - wrise_fx = lin_interp32_fx( L_deposit_h( hSpMusClas->wrise_fx ), 167772160, 2040109466 /* 0.95 in Q31 */, 0, ONE_IN_Q31 /* 1.0f in Q31 */, 1 ); /* Q31 */ /* combine weights into one */ // wght = wrelE * wdrop * wrise; @@ -2501,6 +2502,7 @@ Word16 ivas_smc_gmm_fx( /*------------------------------------------------------------------* * raw S/M decision based on smoothed GMM score *------------------------------------------------------------------*/ + test(); IF( dec == 0 || st->hSpMusClas->wdlp_0_95_sp_32fx <= 0 ) { @@ -2514,6 +2516,7 @@ Word16 ivas_smc_gmm_fx( } move16(); move16(); + /*------------------------------------------------------------------* * Updates *------------------------------------------------------------------*/ @@ -2538,6 +2541,7 @@ Word16 ivas_smc_gmm_fx( return dec; } + /*---------------------------------------------------------------------* * var_cor_calc_ivas_fx() * @@ -2587,20 +2591,21 @@ static void var_cor_calc_ivas_fx( * Attack detection *---------------------------------------------------------------------*/ -static Word16 attack_det_ivas_fx( /* o : attack flag */ - const Word16 *inp, /* i : input signal */ - const Word16 Qx, - const Word16 last_clas, /* i : last signal clas */ - const Word16 localVAD, /* i : local VAD flag */ - const Word16 coder_type, /* i : coder type */ - const Word32 total_brate, /* i : total bitrate */ - const Word16 element_mode, /* i : IVAS element mode */ - const Word16 clas, /* i : signal class */ - Word32 finc_prev[], /* i/o: previous finc, (q_finc_prev) */ - Word16 *q_finc_prev, /* i/o: Q of previous finc */ - Word32 *lt_finc, /* i/o: long-term mean finc, (q_lt_finc) */ - Word16 *q_lt_finc, /* i/o: Q of lt_finc */ - Word16 *last_strong_attack /* i/o: last strong attack flag */ +/* o : attack flag */ +static Word16 attack_det_ivas_fx( + const Word16 *inp, /* i : input signal */ + const Word16 Qx, + const Word16 last_clas, /* i : last signal clas */ + const Word16 localVAD, /* i : local VAD flag */ + const Word16 coder_type, /* i : coder type */ + const Word32 total_brate, /* i : total bitrate */ + const Word16 element_mode, /* i : IVAS element mode */ + const Word16 clas, /* i : signal class */ + Word32 finc_prev[], /* i/o: previous finc, (q_finc_prev) */ + Word16 *q_finc_prev, /* i/o: Q of previous finc */ + Word32 *lt_finc, /* i/o: long-term mean finc, (q_lt_finc) */ + Word16 *q_lt_finc, /* i/o: Q of lt_finc */ + Word16 *last_strong_attack /* i/o: last strong attack flag */ ) { Word16 i, j, tmp, tmp1, attack, exp1, etmp_e, etmp2_e, s; @@ -2639,6 +2644,7 @@ static Word16 attack_det_ivas_fx( /* o : attack flag *q_finc_prev = shl( Qx, 1 ); // Q of finc move16(); q_diff = sub( *q_finc_prev, *q_lt_finc ); + test(); IF( EQ_16( localVAD, 1 ) && EQ_16( coder_type, GENERIC ) ) { @@ -2665,7 +2671,6 @@ static Word16 attack_det_ivas_fx( /* o : attack flag move32(); etmp_e = sub( 31, add( *q_finc_prev, sub( s, add( exp1, 18 ) ) ) ); - tmp1 = sub( ATT_NSEG, attack ); exp1 = norm_s( tmp1 ); tmp = div_s( shl( 1, sub( 14, exp1 ) ), tmp1 ); /*Q(29-exp1) */ @@ -2802,6 +2807,7 @@ static Word16 attack_det_ivas_fx( /* o : attack flag return attack; } + /*---------------------------------------------------------------------* * tonal_det() * @@ -2908,12 +2914,12 @@ void ivas_smc_mode_selection_fx( Encoder_State *st, /* i/o: encoder state structure */ const Word32 element_brate, /* i : element bitrate */ Word16 smc_dec, /* i : raw decision of the 1st stage classifier*/ - const Word16 relE, /* i : relative frame energy, Q8 */ - const Word16 Etot, /* i : total frame energy, Q8 */ + const Word16 relE, /* i : relative frame energy, Q8 */ + const Word16 Etot, /* i : total frame energy, Q8 */ Word16 *attack_flag, /* i/o: attack flag (GSC or TC) */ const Word16 *inp, /* i : input signal */ - const Word16 Q_new, /* i : Q of input signal */ - const Word16 S_map[], /* i : short-term correlation map, Q7 */ + const Word16 Q_new, /* i : Q of input signal */ + const Word16 S_map[], /* i : short-term correlation map, Q7 */ const Word16 flag_spitch /* i : flag to indicate very short stable pitch*/ ) { @@ -3137,8 +3143,8 @@ void ivas_smc_mode_selection_fx( st->sp_aud_decision2 = 1; } } -#endif +#endif /* set GSC noisy speech flag on unvoiced SWB segments */ st->GSC_noisy_speech = 0; move16(); @@ -3203,6 +3209,7 @@ void ivas_smc_mode_selection_fx( return; } + /*---------------------------------------------------------------------* * mode_decision_fx() * @@ -3239,7 +3246,6 @@ static Word16 mode_decision_fx( Word16 M_flux10; SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; - mode = *dec_mov > 16384; logic16(); move16(); @@ -3480,6 +3486,7 @@ static Word16 mode_decision_fx( return ( mode ); } + /*---------------------------------------------------------------------* * tonal_dist_fx() * @@ -3500,7 +3507,6 @@ static void tonal_dist_fx( Word16 Ntonal2; Word16 Ntonal_lf; - /* find number of tonals, number of tonals at low-band, spectral peakiness at high-band */ pk = L_deposit_l( 0 ); @@ -3564,6 +3570,7 @@ static void tonal_dist_fx( return; } + /*---------------------------------------------------------------------* * flux_fx() * @@ -3649,6 +3656,7 @@ static void flux_fx( return; } + /*---------------------------------------------------------------------* * spec_analysis_fx() * @@ -3765,8 +3773,17 @@ static void spec_analysis_fx( p2v_map[peak_idx[i]] = p2v[i]; move16(); } + + return; } + +/*---------------------------------------------------------------------* + * music_mixed_classif_improv_fx() + * + * + *---------------------------------------------------------------------*/ + static void music_mixed_classif_improv_fx( Encoder_State *st, /* i : encoder state structure */ const Word16 *new_inp, /* i : new input signal */ @@ -4139,9 +4156,9 @@ static void music_mixed_classif_improv_fx( static void tonal_context_improv_fx( Encoder_State *st_fx, /* i/o: Encoder state structure */ const Word32 PS[], /* i : energy spectrum */ - const Word16 voi_fv, /* i : scaled voicing feature */ - const Word16 cor_map_sum_fv, /* i : scaled correlation map feature */ - const Word16 LPCErr, /* i : scaled LP prediction error feature */ + const Word16 voi_fv, /* i : scaled voicing feature */ + const Word16 cor_map_sum_fv, /* i : scaled correlation map feature */ + const Word16 LPCErr, /* i : scaled LP prediction error feature */ const Word16 Qx ) { Word16 t2_fx, t3_fx, tL_fx, err_fx, cor_fx, dft_fx; @@ -4504,11 +4521,13 @@ static void tonal_context_improv_fx( return; } + /*----------------------------------------------------------------------------------* * detect_sparseness_fx() * * *----------------------------------------------------------------------------------*/ + static void detect_sparseness_fx( Encoder_State *st_fx, /* i/o: encoder state structure */ const Word16 localVAD_HE_SAD, /* i : HE-SAD flag without hangover */ @@ -4866,6 +4885,7 @@ static void detect_sparseness_fx( * * *---------------------------------------------------------------------*/ + static void order_spectrum_fx( Word16 *vec, Word16 len ) @@ -4928,4 +4948,6 @@ static void order_spectrum_fx( vec[imin] = tmp; move16(); } + + return; }