Loading lib_enc/ivas_core_pre_proc_front_fx.c +1 −1 Original line number Diff line number Diff line Loading @@ -1425,7 +1425,7 @@ ivas_error pre_proc_front_ivas_fx( * TC frame selection *-----------------------------------------------------------------*/ st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ move16(); select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); Loading lib_enc/prot_fx_enc.h +9 −0 Original line number Diff line number Diff line Loading @@ -570,6 +570,15 @@ Word16 signal_clas_fx( /* o : classification for current Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); Word16 signal_clas_ivas_fx( /* o : classification for current frames */ Encoder_State *st, /* i/o: encoder state structure */ const Word16 *speech, /* i : pointer to speech signal for E computation */ const Word32 *ee, /* i : lf/hf E ration for 2 half-frames */ const Word16 relE, /* i : frame relative E to the long term average */ const Word16 L_look, /* i : look-ahead */ Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); void speech_music_classif_fx( Encoder_State *st, /* i/o: state structure */ const Word16 *new_inp, /* i : new input signal */ Loading lib_enc/sig_clas_fx.c +231 −0 Original line number Diff line number Diff line Loading @@ -50,6 +50,236 @@ Word16 signal_clas_fx( /* o : classification for current const Word16 L_look, /* i : look-ahead */ Word16 *clas_mod /* o : class flag for NOOP detection */ ) { Word32 Ltmp; Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee; Word16 tmp16, tmpS; const Word16 *pt1; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; move32(); #endif /*----------------------------------------------------------------* * Calculate average voicing * Calculate average spectral tilt * Calculate zero-crossing rate * Calculate pitch stability *----------------------------------------------------------------*/ /* average voicing on second half-frame and look-ahead */ Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */ mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); /* average spectral tilt in dB */ lo = L_Extract_lc( ee[0], &hi ); lo2 = L_Extract_lc( ee[1], &hi2 ); Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ test(); test(); IF( LT_32( Ltmp, 2048 ) ) { een = 0; move16(); } ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) { een = 512; move16(); } ELSE { /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ exp_ee = norm_l( Ltmp ); frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); exp_ee = sub( 30 - 11, exp_ee ); Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ een = mac_r( C_EE_FX, een, K_EE_FX ); } /* compute zero crossing rate */ pt1 = speech + sub( L_look, 1 ); tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ Ltmp = L_deposit_l( 0 ); FOR( i = 0; i < L_FRAME; i++ ) { tmp16 = add( 1, tmpS ); pt1++; tmpS = shr( *pt1, 15 ); /* pt1 >=0 ---> 0 OTHERWISE -1 */ Ltmp = L_msu0( Ltmp, tmpS, tmp16 ); } zc = extract_l( Ltmp ); /* compute pitch stability */ pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) ); st->tdm_pc = pc; move16(); /*-----------------------------------------------------------------* * Transform parameters to the range <0:1> * Compute the merit function *-----------------------------------------------------------------*/ /* corn = K_COR * mean_voi2 + C_COR */ Ltmp = L_mult( C_COR_FX, 32767 ); corn = round_fx( L_shl( L_mac( Ltmp, mean_voi2, K_COR_FX ), -4 ) ); /*Q13+Q13*Q15 =>Q13->Q9*/ /* Limit [0, 1] */ corn = s_max( corn, 0 ); corn = s_min( corn, 512 ); Ltmp = L_mult( C_ZC_FX, 4 ); /*Q13*Q2 -> Q16*/ zcn = round_fx( L_shl( L_mac( Ltmp, zc, K_ZC_FX ), 16 - 7 ) ); /*Q0*Q15 + Q16*/ /* Limit [0, 1] */ zcn = s_max( zcn, 0 ); zcn = s_min( zcn, 512 ); Ltmp = L_mult( C_RELE_FX, 256 ); /*Q15*Q8 ->Q24*/ relEn = round_fx( L_shl( L_mac( Ltmp, relE, K_RELE_FX ), 1 ) ); /*relE in Q8 but relEn in Q9*/ /* Limit [0.5, 1] */ relEn = s_max( relEn, 256 ); relEn = s_min( relEn, 512 ); Ltmp = L_mult( C_PC_FX, 2 ); /*Q14*Q1 -> Q16*/ pcn = round_fx( L_shl( L_mac( Ltmp, pc, K_PC_FX ), 16 - 7 ) ); /*Q16 + Q0*Q15*/ /* Limit [0, 1] */ pcn = s_max( pcn, 0 ); pcn = s_min( pcn, 512 ); Ltmp = L_mult( een, 10923 ); Ltmp = L_mac( Ltmp, corn, 21845 ); Ltmp = L_mac( Ltmp, zcn, 10923 ); Ltmp = L_mac( Ltmp, relEn, 10923 ); Ltmp = L_mac( Ltmp, pcn, 10923 ); fmerit1 = round_fx_o( L_shl_o( Ltmp, 16 - 10 - 1, &Overflow ), &Overflow ); /* fmerit1 ->Q15 */ /*-----------------------------------------------------------------* * FEC classification *-----------------------------------------------------------------*/ st->fmerit_dt = sub( st->prev_fmerit, fmerit1 ); /*Q15*/ move16(); st->prev_fmerit = fmerit1; move16(); /* FEC classification */ test(); test(); IF( st->localVAD == 0 || EQ_16( st->coder_type, UNVOICED ) || LT_16( relE, -1536 ) ) { clas = UNVOICED_CLAS; *clas_mod = clas; move16(); move16(); } ELSE { SWITCH( st->last_clas ) { case VOICED_CLAS: case ONSET: case VOICED_TRANSITION: IF( LT_16( fmerit1, 16056 ) ) /*0.49f*/ { clas = UNVOICED_CLAS; move16(); } ELSE IF( LT_16( fmerit1, 21626 ) ) /*0.66*/ { clas = VOICED_TRANSITION; move16(); } ELSE { clas = VOICED_CLAS; move16(); } IF( LT_16( fmerit1, 14745 /* 0.45f*/ ) ) { *clas_mod = UNVOICED_CLAS; move16(); } ELSE IF( LT_16( fmerit1, 21626 /* 0.66f*/ ) ) { *clas_mod = VOICED_TRANSITION; move16(); } ELSE { *clas_mod = VOICED_CLAS; move16(); } BREAK; case UNVOICED_CLAS: case UNVOICED_TRANSITION: IF( GT_16( fmerit1, 20643 ) ) /*0.63*/ { clas = ONSET; move16(); } ELSE IF( GT_16( fmerit1, 19169 ) ) /*0.585*/ { clas = UNVOICED_TRANSITION; move16(); } ELSE { clas = UNVOICED_CLAS; move16(); } *clas_mod = clas; move16(); BREAK; default: clas = UNVOICED_CLAS; *clas_mod = clas; move16(); move16(); BREAK; } } /* Onset classification */ /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */ /* tc_cnt == 0: UC frame */ /* tc_cnt == 1: onset/transition frame, coded by GC coder type */ /* tc_cnt == 2: frame after onset/transition frame, coded by TC coder type */ if ( clas == 0 ) { st->tc_cnt = 0; move16(); } test(); IF( GE_16( clas, VOICED_TRANSITION ) && st->tc_cnt >= 0 ) { st->tc_cnt = add( st->tc_cnt, 1 ); move16(); } if ( GT_16( st->tc_cnt, 2 ) ) { st->tc_cnt = -1; move16(); } return clas; } Word16 signal_clas_ivas_fx( /* o : classification for current frames */ Encoder_State *st, /* i/o: encoder state structure */ const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ const Word16 relE, /* i : frame relative E to the long term average in Q8 */ const Word16 L_look, /* i : look-ahead */ Word16 *clas_mod /* o : class flag for NOOP detection */ ) { Word32 Ltmp; Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; Loading Loading @@ -273,6 +503,7 @@ Word16 signal_clas_fx( /* o : classification for current } return clas; } /*-------------------------------------------------------------------* * select_TC_fx() * Loading Loading
lib_enc/ivas_core_pre_proc_front_fx.c +1 −1 Original line number Diff line number Diff line Loading @@ -1425,7 +1425,7 @@ ivas_error pre_proc_front_ivas_fx( * TC frame selection *-----------------------------------------------------------------*/ st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ move16(); select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); Loading
lib_enc/prot_fx_enc.h +9 −0 Original line number Diff line number Diff line Loading @@ -570,6 +570,15 @@ Word16 signal_clas_fx( /* o : classification for current Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); Word16 signal_clas_ivas_fx( /* o : classification for current frames */ Encoder_State *st, /* i/o: encoder state structure */ const Word16 *speech, /* i : pointer to speech signal for E computation */ const Word32 *ee, /* i : lf/hf E ration for 2 half-frames */ const Word16 relE, /* i : frame relative E to the long term average */ const Word16 L_look, /* i : look-ahead */ Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); void speech_music_classif_fx( Encoder_State *st, /* i/o: state structure */ const Word16 *new_inp, /* i : new input signal */ Loading
lib_enc/sig_clas_fx.c +231 −0 Original line number Diff line number Diff line Loading @@ -50,6 +50,236 @@ Word16 signal_clas_fx( /* o : classification for current const Word16 L_look, /* i : look-ahead */ Word16 *clas_mod /* o : class flag for NOOP detection */ ) { Word32 Ltmp; Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee; Word16 tmp16, tmpS; const Word16 *pt1; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; move32(); #endif /*----------------------------------------------------------------* * Calculate average voicing * Calculate average spectral tilt * Calculate zero-crossing rate * Calculate pitch stability *----------------------------------------------------------------*/ /* average voicing on second half-frame and look-ahead */ Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */ mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); /* average spectral tilt in dB */ lo = L_Extract_lc( ee[0], &hi ); lo2 = L_Extract_lc( ee[1], &hi2 ); Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ test(); test(); IF( LT_32( Ltmp, 2048 ) ) { een = 0; move16(); } ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) { een = 512; move16(); } ELSE { /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ exp_ee = norm_l( Ltmp ); frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); exp_ee = sub( 30 - 11, exp_ee ); Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ een = mac_r( C_EE_FX, een, K_EE_FX ); } /* compute zero crossing rate */ pt1 = speech + sub( L_look, 1 ); tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ Ltmp = L_deposit_l( 0 ); FOR( i = 0; i < L_FRAME; i++ ) { tmp16 = add( 1, tmpS ); pt1++; tmpS = shr( *pt1, 15 ); /* pt1 >=0 ---> 0 OTHERWISE -1 */ Ltmp = L_msu0( Ltmp, tmpS, tmp16 ); } zc = extract_l( Ltmp ); /* compute pitch stability */ pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) ); st->tdm_pc = pc; move16(); /*-----------------------------------------------------------------* * Transform parameters to the range <0:1> * Compute the merit function *-----------------------------------------------------------------*/ /* corn = K_COR * mean_voi2 + C_COR */ Ltmp = L_mult( C_COR_FX, 32767 ); corn = round_fx( L_shl( L_mac( Ltmp, mean_voi2, K_COR_FX ), -4 ) ); /*Q13+Q13*Q15 =>Q13->Q9*/ /* Limit [0, 1] */ corn = s_max( corn, 0 ); corn = s_min( corn, 512 ); Ltmp = L_mult( C_ZC_FX, 4 ); /*Q13*Q2 -> Q16*/ zcn = round_fx( L_shl( L_mac( Ltmp, zc, K_ZC_FX ), 16 - 7 ) ); /*Q0*Q15 + Q16*/ /* Limit [0, 1] */ zcn = s_max( zcn, 0 ); zcn = s_min( zcn, 512 ); Ltmp = L_mult( C_RELE_FX, 256 ); /*Q15*Q8 ->Q24*/ relEn = round_fx( L_shl( L_mac( Ltmp, relE, K_RELE_FX ), 1 ) ); /*relE in Q8 but relEn in Q9*/ /* Limit [0.5, 1] */ relEn = s_max( relEn, 256 ); relEn = s_min( relEn, 512 ); Ltmp = L_mult( C_PC_FX, 2 ); /*Q14*Q1 -> Q16*/ pcn = round_fx( L_shl( L_mac( Ltmp, pc, K_PC_FX ), 16 - 7 ) ); /*Q16 + Q0*Q15*/ /* Limit [0, 1] */ pcn = s_max( pcn, 0 ); pcn = s_min( pcn, 512 ); Ltmp = L_mult( een, 10923 ); Ltmp = L_mac( Ltmp, corn, 21845 ); Ltmp = L_mac( Ltmp, zcn, 10923 ); Ltmp = L_mac( Ltmp, relEn, 10923 ); Ltmp = L_mac( Ltmp, pcn, 10923 ); fmerit1 = round_fx_o( L_shl_o( Ltmp, 16 - 10 - 1, &Overflow ), &Overflow ); /* fmerit1 ->Q15 */ /*-----------------------------------------------------------------* * FEC classification *-----------------------------------------------------------------*/ st->fmerit_dt = sub( st->prev_fmerit, fmerit1 ); /*Q15*/ move16(); st->prev_fmerit = fmerit1; move16(); /* FEC classification */ test(); test(); IF( st->localVAD == 0 || EQ_16( st->coder_type, UNVOICED ) || LT_16( relE, -1536 ) ) { clas = UNVOICED_CLAS; *clas_mod = clas; move16(); move16(); } ELSE { SWITCH( st->last_clas ) { case VOICED_CLAS: case ONSET: case VOICED_TRANSITION: IF( LT_16( fmerit1, 16056 ) ) /*0.49f*/ { clas = UNVOICED_CLAS; move16(); } ELSE IF( LT_16( fmerit1, 21626 ) ) /*0.66*/ { clas = VOICED_TRANSITION; move16(); } ELSE { clas = VOICED_CLAS; move16(); } IF( LT_16( fmerit1, 14745 /* 0.45f*/ ) ) { *clas_mod = UNVOICED_CLAS; move16(); } ELSE IF( LT_16( fmerit1, 21626 /* 0.66f*/ ) ) { *clas_mod = VOICED_TRANSITION; move16(); } ELSE { *clas_mod = VOICED_CLAS; move16(); } BREAK; case UNVOICED_CLAS: case UNVOICED_TRANSITION: IF( GT_16( fmerit1, 20643 ) ) /*0.63*/ { clas = ONSET; move16(); } ELSE IF( GT_16( fmerit1, 19169 ) ) /*0.585*/ { clas = UNVOICED_TRANSITION; move16(); } ELSE { clas = UNVOICED_CLAS; move16(); } *clas_mod = clas; move16(); BREAK; default: clas = UNVOICED_CLAS; *clas_mod = clas; move16(); move16(); BREAK; } } /* Onset classification */ /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */ /* tc_cnt == 0: UC frame */ /* tc_cnt == 1: onset/transition frame, coded by GC coder type */ /* tc_cnt == 2: frame after onset/transition frame, coded by TC coder type */ if ( clas == 0 ) { st->tc_cnt = 0; move16(); } test(); IF( GE_16( clas, VOICED_TRANSITION ) && st->tc_cnt >= 0 ) { st->tc_cnt = add( st->tc_cnt, 1 ); move16(); } if ( GT_16( st->tc_cnt, 2 ) ) { st->tc_cnt = -1; move16(); } return clas; } Word16 signal_clas_ivas_fx( /* o : classification for current frames */ Encoder_State *st, /* i/o: encoder state structure */ const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ const Word16 relE, /* i : frame relative E to the long term average in Q8 */ const Word16 L_look, /* i : look-ahead */ Word16 *clas_mod /* o : class flag for NOOP detection */ ) { Word32 Ltmp; Word16 mean_voi2, een, corn, zcn, relEn, pcn, fmerit1; Loading Loading @@ -273,6 +503,7 @@ Word16 signal_clas_fx( /* o : classification for current } return clas; } /*-------------------------------------------------------------------* * select_TC_fx() * Loading