From 73e27ddde49f7482f26e46a0bbfdca9b42123a4a Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Tue, 3 Feb 2026 09:42:49 -0500 Subject: [PATCH 1/6] harmonisation of signal_clas --- lib_com/options.h | 2 +- lib_enc/ivas_core_pre_proc_front_fx.c | 4 ++ lib_enc/prot_fx_enc.h | 4 +- lib_enc/sig_clas_fx.c | 80 +++++++++++++++++++-------- 4 files changed, 63 insertions(+), 27 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 0ea316a82..7e9d5383f 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -109,7 +109,7 @@ #define FIX_2391_INIT_HQ_GENERIC_OFFSET /* FhG/Eri: basop issue 2391: make sure hq_generic_offset is initialized inside hq_hr_dec_fx() */ #define FIX_2397_COPY_AQ_MDCT_CORE_BFI /* FhG: prevent copying of uninit memory in MDCT stereo core if bfi is set */ #define HARMONIZE_TBE /* VA: harmonize core-coder TBE function duplications */ - +#define FIX_2404_HARM_SIGNAL_CLAS /* VA: basop-2404, harmonize signal_clas and signa_ivas_clas */ /* #################### End BE switches ################################## */ /* #################### Start NON-BE switches ############################ */ diff --git a/lib_enc/ivas_core_pre_proc_front_fx.c b/lib_enc/ivas_core_pre_proc_front_fx.c index 8541b3fd9..daf84de78 100644 --- a/lib_enc/ivas_core_pre_proc_front_fx.c +++ b/lib_enc/ivas_core_pre_proc_front_fx.c @@ -1258,7 +1258,11 @@ void pre_proc_front_ivas_fx( * TC frame selection *-----------------------------------------------------------------*/ +#ifndef FIX_2404_HARM_SIGNAL_CLAS st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ +#else + st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, *relE_fx, L_look, tdm_SM_last_clas ); /* Q0 */ +#endif move16(); select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD ); diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 49c438515..513978304 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -510,7 +510,7 @@ Word16 signal_clas_fx( const Word16 L_look, /* i : look-ahead */ Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); - +#ifndef FIX_2404_HARM_SIGNAL_CLAS /* o : classification for current frames */ Word16 signal_clas_ivas_fx( Encoder_State *st, /* i/o: encoder state structure */ @@ -520,7 +520,7 @@ Word16 signal_clas_ivas_fx( const Word16 L_look, /* i : look-ahead */ Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); - +#endif void speech_music_classif_fx( Encoder_State *st, /* i/o: state structure */ const Word16 *new_inp, /* i : new input signal */ diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index ccc1e38e9..80136afef 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -55,6 +55,9 @@ Word16 signal_clas_fx( /* o : classification for current Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee; Word16 tmp16, tmpS; const Word16 *pt1; +#ifdef FIX_2404_HARM_SIGNAL_CLAS + Word64 tmp64; +#endif Flag Overflow = 0; move32(); @@ -69,33 +72,61 @@ Word16 signal_clas_fx( /* o : classification for current Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */ mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); - /* average spectral tilt in dB */ - lo = L_Extract_lc( ee[0], &hi ); - lo2 = L_Extract_lc( ee[1], &hi2 ); - Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ - test(); - test(); - IF( LT_32( Ltmp, 2048 ) ) +#ifdef FIX_2404_HARM_SIGNAL_CLAS + IF( st->element_mode != EVS_MONO ) { - een = 0; - move16(); - } - ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) - { - een = 512; - move16(); + tmp64 = W_mult0_32_32( ee[0], ee[1] ); + exp_ee = W_norm( tmp64 ); + Ltmp = W_extract_h( W_shl( tmp64, exp_ee ) ); // Q = Q6+Q6 + exp_ee - 32 + exp_ee = sub( 31, sub( add( Q12, exp_ee ), 32 ) ); + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Ltmp, exp_ee, ONE_IN_Q31, 0 ), -1 ) ) + { + een = 0; + move16(); + } + ELSE + { + /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ + /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ + Ltmp = BASOP_Util_Log10( Ltmp, exp_ee ); // Q25 + Ltmp = Mpy_32_32( Ltmp, 671088640 /*20.f in Q25*/ ); // Q25 + Q25 -Q31 = Q19 * 0.5 = Q20 + een = extract_l( L_shl( Mpy_32_16_1( Ltmp, K_EE_FX ), Q9 - Q20 ) ); // Q9 + + een = s_min( s_max( een, 0 ), 512 ); + } } - ELSE +#endif { - /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ - /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ - exp_ee = norm_l( Ltmp ); - frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); - exp_ee = sub( 30 - 11, exp_ee ); - Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ - een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ - een = mac_r( C_EE_FX, een, K_EE_FX ); + + /* average spectral tilt in dB */ + lo = L_Extract_lc( ee[0], &hi ); + lo2 = L_Extract_lc( ee[1], &hi2 ); + Ltmp = L_mult( lo, lo2 ); /* Q5*Q5->Q11 */ + + test(); + test(); + IF( LT_32( Ltmp, 2048 ) ) + { + een = 0; + move16(); + } + ELSE IF( GT_32( Ltmp, THRES_EEN ) || hi > 0 || hi2 > 0 ) + { + een = 512; + move16(); + } + ELSE + { + /* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */ + /* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */ + exp_ee = norm_l( Ltmp ); + frac_ee = Log2_norm_lc( L_shl( Ltmp, exp_ee ) ); + exp_ee = sub( 30 - 11, exp_ee ); + Ltmp = Mpy_32_16( exp_ee, frac_ee, LG10 ); /* Ltmp Q14 */ + een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */ + een = mac_r( C_EE_FX, een, K_EE_FX ); + } } /* compute zero crossing rate */ pt1 = speech + sub( L_look, 1 ); @@ -268,7 +299,7 @@ Word16 signal_clas_fx( /* o : classification for current } return clas; } - +#ifndef FIX_2404_HARM_SIGNAL_CLAS Word16 signal_clas_ivas_fx( /* o : classification for current frames */ Encoder_State *st, /* i/o: encoder state structure */ const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ @@ -498,6 +529,7 @@ Word16 signal_clas_ivas_fx( /* o : classification for cur } return clas; } +#endif /*-------------------------------------------------------------------* * select_TC_fx() -- GitLab From d190c2786a4df4d2592bcc34e50f98017d2a4d8e Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Tue, 3 Feb 2026 09:50:41 -0500 Subject: [PATCH 2/6] fix clang-format --- lib_enc/sig_clas_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index 80136afef..ee23ce5e7 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -73,7 +73,7 @@ Word16 signal_clas_fx( /* o : classification for current mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); -#ifdef FIX_2404_HARM_SIGNAL_CLAS +#ifdef FIX_2404_HARM_SIGNAL_CLAS IF( st->element_mode != EVS_MONO ) { tmp64 = W_mult0_32_32( ee[0], ee[1] ); -- GitLab From a05efd71c4440c32389e3b20edb492f6af7d6e27 Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Tue, 3 Feb 2026 12:35:57 -0500 Subject: [PATCH 3/6] add missing ELSE --- lib_enc/sig_clas_fx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index ee23ce5e7..07ad13389 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -96,6 +96,7 @@ Word16 signal_clas_fx( /* o : classification for current een = s_min( s_max( een, 0 ), 512 ); } } + ELSE #endif { -- GitLab From 62b319ef613d7681ad836f5020f78eac66e29c3c Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 3 Feb 2026 19:01:14 +0100 Subject: [PATCH 4/6] editorial improvements --- lib_enc/prot_fx_enc.h | 1 + lib_enc/sig_clas_fx.c | 40 +++++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 513978304..11e86137e 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -521,6 +521,7 @@ Word16 signal_clas_ivas_fx( Word16 *uc_clas /* o : temporary classification used in music/speech class*/ ); #endif + void speech_music_classif_fx( Encoder_State *st, /* i/o: state structure */ const Word16 *new_inp, /* i : new input signal */ diff --git a/lib_enc/sig_clas_fx.c b/lib_enc/sig_clas_fx.c index 07ad13389..19c17e404 100644 --- a/lib_enc/sig_clas_fx.c +++ b/lib_enc/sig_clas_fx.c @@ -31,9 +31,9 @@ #define K_SNR_FX 3541 /* Q15 .1111 */ #define C_SNR_FX -10921 /* Q15 -0.3333f */ - #define THRES_EEN 514206 /* 251.077 => (10^(1/(K_EE*10))) Q11*/ + /*-------------------------------------------------------------------* * signal_clas_fx() * @@ -41,13 +41,14 @@ * TC frames selection *-------------------------------------------------------------------*/ -Word16 signal_clas_fx( /* o : classification for current frames */ - Encoder_State *st, /* i/o: encoder state structure */ - const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ - const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ - const Word16 relE, /* i : frame relative E to the long term average in Q8 */ - const Word16 L_look, /* i : look-ahead */ - Word16 *clas_mod /* o : class flag for NOOP detection */ +/* o : classification for current frames */ +Word16 signal_clas_fx( + Encoder_State *st, /* i/o: encoder state structure */ + const Word16 *speech, /* i : pointer to speech signal for E computation in Qx */ + const Word32 *ee, /* i : lf/hf E ration for 2 half-frames in Q6 */ + const Word16 relE, /* i : frame relative E to the long term average in Q8 */ + const Word16 L_look, /* i : look-ahead */ + Word16 *clas_mod /* o : class flag for NOOP detection */ ) { Word32 Ltmp; @@ -72,7 +73,6 @@ Word16 signal_clas_fx( /* o : classification for current Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15*Q14->Q30 */ mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 ); - #ifdef FIX_2404_HARM_SIGNAL_CLAS IF( st->element_mode != EVS_MONO ) { @@ -92,14 +92,12 @@ Word16 signal_clas_fx( /* o : classification for current Ltmp = BASOP_Util_Log10( Ltmp, exp_ee ); // Q25 Ltmp = Mpy_32_32( Ltmp, 671088640 /*20.f in Q25*/ ); // Q25 + Q25 -Q31 = Q19 * 0.5 = Q20 een = extract_l( L_shl( Mpy_32_16_1( Ltmp, K_EE_FX ), Q9 - Q20 ) ); // Q9 - een = s_min( s_max( een, 0 ), 512 ); } } ELSE #endif { - /* average spectral tilt in dB */ lo = L_Extract_lc( ee[0], &hi ); lo2 = L_Extract_lc( ee[1], &hi2 ); @@ -129,6 +127,7 @@ Word16 signal_clas_fx( /* o : classification for current een = mac_r( C_EE_FX, een, K_EE_FX ); } } + /* compute zero crossing rate */ pt1 = speech + sub( L_look, 1 ); tmpS = shr( *pt1, 15 ); /* sets 'tmpS to -1 if *pt1 < 0 */ @@ -146,6 +145,7 @@ Word16 signal_clas_fx( /* o : classification for current pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) ); st->tdm_pc = pc; move16(); + /*-----------------------------------------------------------------* * Transform parameters to the range <0:1> * Compute the merit function @@ -273,8 +273,8 @@ Word16 signal_clas_fx( /* o : classification for current BREAK; } } - /* Onset classification */ + /* Onset classification */ /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */ /* tc_cnt == 0: UC frame */ /* tc_cnt == 1: onset/transition frame, coded by GC coder type */ @@ -298,8 +298,10 @@ Word16 signal_clas_fx( /* o : classification for current st->tc_cnt = -1; move16(); } + return clas; } + #ifndef FIX_2404_HARM_SIGNAL_CLAS Word16 signal_clas_ivas_fx( /* o : classification for current frames */ Encoder_State *st, /* i/o: encoder state structure */ @@ -552,6 +554,7 @@ void select_TC_fx( * Select TC coder type for appropriate frames which is in general VOICED_TRANSITION, * VOICED_CLAS or ONSET frames following UNVOICED_CLAS frames *---------------------------------------------------------------------*/ + test(); IF( localVAD != 0 && GE_16( tc_cnt, 1 ) ) { @@ -573,21 +576,21 @@ void select_TC_fx( return; } + /*-------------------------------------------------------------------* * coder_type_modif_fx() * * Coder type modification *-------------------------------------------------------------------*/ + void coder_type_modif_fx( Encoder_State *st, /* i/o: encoder state structure */ const Word16 relE /* i : frame relative E to the long term average */ ) { Word16 unmod_coder_type, vbr_generic_ho; - SC_VBR_ENC_HANDLE hSC_VBR = st->hSC_VBR; - IF( st->Opt_SC_VBR ) { vbr_generic_ho = hSC_VBR->vbr_generic_ho; @@ -615,8 +618,8 @@ void coder_type_modif_fx( test(); test(); test(); - if ( ( st->element_mode == 0 && GT_32( st->total_brate, ACELP_9k60 ) && EQ_16( st->coder_type, UNVOICED ) ) || - ( st->element_mode > 0 && GT_32( st->total_brate, MAX_UNVOICED_BRATE ) && EQ_16( st->coder_type, UNVOICED ) ) ) + if ( ( st->element_mode == EVS_MONO && GT_32( st->total_brate, ACELP_9k60 ) && EQ_16( st->coder_type, UNVOICED ) ) || + ( st->element_mode > EVS_MONO && GT_32( st->total_brate, MAX_UNVOICED_BRATE ) && EQ_16( st->coder_type, UNVOICED ) ) ) { st->coder_type = GENERIC; move16(); @@ -642,8 +645,7 @@ void coder_type_modif_fx( test(); test(); test(); - if ( st->localVAD == 0 && ( ( - EQ_16( st->coder_type, UNVOICED ) && ( ( st->Opt_SC_VBR == 0 ) || ( ( EQ_16( st->Opt_SC_VBR, 1 ) ) && vbr_generic_ho == 0 && GT_16( st->last_coder_type, UNVOICED ) ) ) ) || + if ( st->localVAD == 0 && ( ( EQ_16( st->coder_type, UNVOICED ) && ( ( st->Opt_SC_VBR == 0 ) || ( ( EQ_16( st->Opt_SC_VBR, 1 ) ) && vbr_generic_ho == 0 && GT_16( st->last_coder_type, UNVOICED ) ) ) ) || EQ_16( st->coder_type, TRANSITION ) || EQ_16( st->coder_type, VOICED ) ) ) @@ -690,7 +692,7 @@ void coder_type_modif_fx( } } - IF( st->element_mode == 0 ) + IF( st->element_mode == EVS_MONO ) { /* At higher rates and with 16kHz core, allow only GC and TC coder type */ test(); -- GitLab From 053f46eaca249bbc9081e457849a775ef970fc54 Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 5 Feb 2026 19:42:46 +0100 Subject: [PATCH 5/6] update with main --- lib_com/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index 96de1f36f..6afb91c48 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -90,8 +90,8 @@ #define FIX_2280_REDUCTION_UNNECESSARY_SCALING /* VA: reduction of unnecessary scaling */ #define FIX_2280_REDUCTION_UNNECESSARY_SCALING_NONBE /* VA: reduction of unnecessary scaling, non-BE part */ #define FIX_2403_COMBINE_PITCH_OL /* VA : basop 2403, reusing common code between EVS and IVAS in pitch_ol */ -#define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ #define FIX_2404_HARM_SIGNAL_CLAS /* VA: basop-2404, harmonize signal_clas and signa_ivas_clas */ +#define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ /* #################### End BE switches ################################## */ -- GitLab From e104a5ec5f7c2bd1daa569812234ac5e48831c61 Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 5 Feb 2026 19:47:16 +0100 Subject: [PATCH 6/6] clang-format --- lib_enc/speech_music_classif_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_enc/speech_music_classif_fx.c b/lib_enc/speech_music_classif_fx.c index cd8c13a5f..9db4b6b04 100644 --- a/lib_enc/speech_music_classif_fx.c +++ b/lib_enc/speech_music_classif_fx.c @@ -1157,7 +1157,7 @@ static Word16 sp_mus_classif_gmm_fx( /* calculate weight based on relE (close to 0.01 in low-E regions, close to 1 in high-E regions) */ /*wrelE = 1.0f + relE/15;*/ wrelE = add( 2048, mult_r( relE, 17476 ) ); /* 1/15 in Q18 -> 17476 result in Q11 */ - + wrelE = s_min( wrelE, 2048 ); wrelE = s_max( wrelE, 20 ); -- GitLab