diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c index 3d7d87bc201f5819e1b2f190d42a6c29dd9c5f54..4a8c4edd269883639042cdbfa76d0309a9d3f6f9 100644 --- a/lib_enc/ivas_core_pre_proc_front_fx.c +++ b/lib_enc/ivas_core_pre_proc_front_fx.c @@ -1425,7 +1425,7 @@ ivas_error pre_proc_front_ivas_fx( * TC frame selection *-----------------------------------------------------------------*/ - st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ + st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ move16(); select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 4237aa9c3ab398dd1fa23058653550ead36bf8c9..2aa5df5c4ce9028dfe34cd0cc805c9e4d10e6431 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -570,6 +570,15 @@ Word16 signal_clas_fx( /* o : classification for current Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); +Word16 signal_clas_ivas_fx( /* o : classification for current frames */ + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *speech, /* i : pointer to speech signal for E computation */ + const Word32 *ee, /* i : lf/hf E ration for 2 half-frames */ + const Word16 relE, /* i : frame relative E to the long term average */ + const Word16 L_look, /* i : look-ahead */ + Word16 *uc_clas /* o : temporary classification used in music/speech class*/ +); + void speech_music_classif_fx( Encoder_State *st, /* i/o: state structure */ const Word16 *new_inp, /* i : new input signal */ diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index cf5d31d22001c277a70040332e8c73383e4be02a..beb97514a0d139e512f3936143a461e75d89de6e 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -100,7 +100,238 @@ Word16 signal_clas_fx( /* o : classification for current een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ een = mac_r( C_EE_FX, een, K_EE_FX ); } + /* compute zero crossing rate */ + pt1 = speech + sub( L_look, 1 ); + tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ + Ltmp = L_deposit_l( 0 ); + FOR( i = 0; i < L_FRAME; i++ ) + { + tmp16 = add( 1, tmpS ); + pt1++; + tmpS = shr( *pt1, 15 ); /* pt1 >=0 ---> 0 OTHERWISE -1 */ + Ltmp = L_msu0( Ltmp, tmpS, tmp16 ); + } + zc = extract_l( Ltmp ); + + /* compute pitch stability */ + pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) ); + st->tdm_pc = pc; + move16(); + /*-----------------------------------------------------------------* + * Transform parameters to the range <0:1> + * Compute the merit function + *-----------------------------------------------------------------*/ + + /* corn = K_COR * mean_voi2 + C_COR */ + Ltmp = L_mult( C_COR_FX, 32767 ); + corn = round_fx( L_shl( L_mac( Ltmp, mean_voi2, K_COR_FX ), -4 ) ); /*Q13+Q13*Q15 =>Q13->Q9*/ + /* Limit [0, 1] */ + corn = s_max( corn, 0 ); + corn = s_min( corn, 512 ); + + Ltmp = L_mult( C_ZC_FX, 4 ); /*Q13*Q2 -> Q16*/ + zcn = round_fx( L_shl( L_mac( Ltmp, zc, K_ZC_FX ), 16 - 7 ) ); /*Q0*Q15 + Q16*/ + /* Limit [0, 1] */ + zcn = s_max( zcn, 0 ); + zcn = s_min( zcn, 512 ); + + Ltmp = L_mult( C_RELE_FX, 256 ); /*Q15*Q8 ->Q24*/ + relEn = round_fx( L_shl( L_mac( Ltmp, relE, K_RELE_FX ), 1 ) ); /*relE in Q8 but relEn in Q9*/ + /* Limit [0.5, 1] */ + relEn = s_max( relEn, 256 ); + relEn = s_min( relEn, 512 ); + + Ltmp = L_mult( C_PC_FX, 2 ); /*Q14*Q1 -> Q16*/ + pcn = round_fx( L_shl( L_mac( Ltmp, pc, K_PC_FX ), 16 - 7 ) ); /*Q16 + Q0*Q15*/ + /* Limit [0, 1] */ + pcn = s_max( pcn, 0 ); + pcn = s_min( pcn, 512 ); + + Ltmp = L_mult( een, 10923 ); + Ltmp = L_mac( Ltmp, corn, 21845 ); + Ltmp = L_mac( Ltmp, zcn, 10923 ); + Ltmp = L_mac( Ltmp, relEn, 10923 ); + Ltmp = L_mac( Ltmp, pcn, 10923 ); + + fmerit1 = round_fx_o( L_shl_o( Ltmp, 16 - 10 - 1, &Overflow ), &Overflow ); /* fmerit1 ->Q15 */ + + /*-----------------------------------------------------------------* + * FEC classification + *-----------------------------------------------------------------*/ + + st->fmerit_dt = sub( st->prev_fmerit, fmerit1 ); /*Q15*/ + move16(); + st->prev_fmerit = fmerit1; + move16(); + /* FEC classification */ + test(); + test(); + IF( st->localVAD == 0 || EQ_16( st->coder_type, UNVOICED ) || LT_16( relE, -1536 ) ) + { + clas = UNVOICED_CLAS; + *clas_mod = clas; + move16(); + move16(); + } + ELSE + { + SWITCH( st->last_clas ) + { + case VOICED_CLAS: + case ONSET: + case VOICED_TRANSITION: + + IF( LT_16( fmerit1, 16056 ) ) /*0.49f*/ + { + clas = UNVOICED_CLAS; + move16(); + } + ELSE IF( LT_16( fmerit1, 21626 ) ) /*0.66*/ + { + clas = VOICED_TRANSITION; + move16(); + } + ELSE + { + clas = VOICED_CLAS; + move16(); + } + IF( LT_16( fmerit1, 14745 /* 0.45f*/ ) ) + { + *clas_mod = UNVOICED_CLAS; + move16(); + } + ELSE IF( LT_16( fmerit1, 21626 /* 0.66f*/ ) ) + { + *clas_mod = VOICED_TRANSITION; + move16(); + } + ELSE + { + *clas_mod = VOICED_CLAS; + move16(); + } + BREAK; + + case UNVOICED_CLAS: + case UNVOICED_TRANSITION: + IF( GT_16( fmerit1, 20643 ) ) /*0.63*/ + { + clas = ONSET; + move16(); + } + ELSE IF( GT_16( fmerit1, 19169 ) ) /*0.585*/ + { + clas = UNVOICED_TRANSITION; + move16(); + } + ELSE + { + clas = UNVOICED_CLAS; + move16(); + } + *clas_mod = clas; + move16(); + + BREAK; + + default: + clas = UNVOICED_CLAS; + *clas_mod = clas; + move16(); + move16(); + BREAK; + } + } + /* Onset classification */ + + /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */ + /* tc_cnt == 0: UC frame */ + /* tc_cnt == 1: onset/transition frame, coded by GC coder type */ + /* tc_cnt == 2: frame after onset/transition frame, coded by TC coder type */ + + if ( clas == 0 ) + { + st->tc_cnt = 0; + move16(); + } + + test(); + IF( GE_16( clas, VOICED_TRANSITION ) && st->tc_cnt >= 0 ) + { + st->tc_cnt = add( st->tc_cnt, 1 ); + move16(); + } + + if ( GT_16( st->tc_cnt, 2 ) ) + { + st->tc_cnt = -1; + move16(); + } + return clas; +} + +Word16 signal_clas_ivas_fx( /* o : classification for current frames */ + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ + const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ + const Word16 relE, /* i : frame relative E to the long term average in Q8 */ + const Word16 L_look, /* i : look-ahead */ + Word16 *clas_mod /* o : class flag for NOOP detection */ +) +{ + Word32 Ltmp; + Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; + Word16 i, clas, pc, zc, exp_ee; + Word16 tmp16, tmpS; + const Word16 *pt1; + Word64 tmp64; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; + move32(); +#endif + + /*----------------------------------------------------------------* + * Calculate average voicing + * Calculate average spectral tilt + * Calculate zero-crossing rate + * Calculate pitch stability + *----------------------------------------------------------------*/ + + /* average voicing on second half-frame and look-ahead */ + Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */ + mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); + + /* average spectral tilt in dB */ + tmp64 = W_mult0_32_32( ee[0], ee[1] ); + exp_ee = W_norm( tmp64 ); + Ltmp = W_extract_h( W_shl( tmp64, exp_ee ) ); // Q = Q6+Q6 + exp_ee - 32 + exp_ee = sub( 31, sub( add( Q12, exp_ee ), 32 ) ); + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Ltmp, exp_ee, ONE_IN_Q31, 0 ), -1 ) ) + { + een = 0; + move16(); + } + ELSE + { + /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ + /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ + Ltmp = BASOP_Util_Log10( Ltmp, exp_ee ); // Q25 + Ltmp = Mpy_32_32( Ltmp, 671088640 /*20.f in Q25*/ ); // Q25 + Q25 -Q31 = Q19 * 0.5 = Q20 + een = extract_l( L_shl( Mpy_32_16_1( Ltmp, K_EE_FX ), Q9 - Q20 ) ); // Q9 + + IF( GT_16( een, 512 ) ) + { + een = 512; + move16(); + } + ELSE IF( een < 0 ) + { + een = 0; + move16(); + } + } /* compute zero crossing rate */ pt1 = speech + sub( L_look, 1 ); tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ @@ -272,6 +503,7 @@ Word16 signal_clas_fx( /* o : classification for current } return clas; } + /*-------------------------------------------------------------------* * select_TC_fx() *