From 893305f6980abc23455e2f3f51119aa1cc5826ab Mon Sep 17 00:00:00 2001 From: malenov Date: Wed, 19 Apr 2023 16:49:50 +0200 Subject: [PATCH 1/2] increase the S_p2a threshold to prevent ACELP->TCX mis-classifications on male voices --- lib_com/cnst.h | 4 ++++ lib_com/options.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib_com/cnst.h b/lib_com/cnst.h index db1dc3d898..a3065a50f5 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -1513,7 +1513,11 @@ enum #define TOD_NSPEC 80 /* number of spectral bins of the tonal detector */ #define TOD_THR_MASS 0.86f /* initial value for the adaptive threshold of the tonal detector */ #define P2A_FACT 0.9f /* long-term averaging factor for peak-to-average ratio */ +#ifdef FIX_SP2A +#define THR_P2A 95.0f /* threshold to detect strongly peaky signals */ +#else #define THR_P2A 80.0f /* threshold to detect strongly peaky signals */ +#endif /*----------------------------------------------------------------------------------* * LD music post-filter constants diff --git a/lib_com/options.h b/lib_com/options.h index c188222e22..05b99a80c2 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -168,8 +168,8 @@ #define FIX_ISM_DTX_CLICKS /* FhG: fix for clicks in ISM DTX for inactive to active TCX transitions */ #define ISSUE_24_CLEANUP_MCT_LFE /* Issue 24: Cleanup LFE path withing MCT */ - #define FIX_401_DIRAC_RENDERER_META_READ_INDICES /* Nokia: Issue 401: Fix metadata reading indices in DirAC renderer. */ +#define FIX_SP2A /* VA: Issue 412: Adjust threshold for the S_p2a feature in the tonal detector */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ -- GitLab From 185b0199bbafc65feab7a0c13d9780a7600ee843 Mon Sep 17 00:00:00 2001 From: malenov Date: Mon, 24 Apr 2023 12:43:25 +0200 Subject: [PATCH 2/2] limiting the impact to 13.2 and 16.4 kbps only --- lib_com/cnst.h | 5 ++--- lib_enc/speech_music_classif.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lib_com/cnst.h b/lib_com/cnst.h index a3065a50f5..6533d91faf 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -1514,10 +1514,9 @@ enum #define TOD_THR_MASS 0.86f /* initial value for the adaptive threshold of the tonal detector */ #define P2A_FACT 0.9f /* long-term averaging factor for peak-to-average ratio */ #ifdef FIX_SP2A -#define THR_P2A 95.0f /* threshold to detect strongly peaky signals */ -#else -#define THR_P2A 80.0f /* threshold to detect strongly peaky signals */ +#define THR_P2A_HIGH 95.0f /* higher threshold to detect strongly peaky signals at low bitrates*/ #endif +#define THR_P2A 80.0f /* lower threshold to detect strongly peaky signals at higher bitrates */ /*----------------------------------------------------------------------------------* * LD music post-filter constants diff --git a/lib_enc/speech_music_classif.c b/lib_enc/speech_music_classif.c index cf8568e649..80bd2d6d2a 100644 --- a/lib_enc/speech_music_classif.c +++ b/lib_enc/speech_music_classif.c @@ -1828,6 +1828,9 @@ void ivas_smc_mode_selection( float ton; int16_t i; float S_p2a, S_max, S_ave; +#ifdef FIX_SP2A + float thr_sp2a; +#endif SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; @@ -1858,8 +1861,23 @@ void ivas_smc_mode_selection( S_ave = sum_f( st->hSpMusClas->tod_lt_Bin_E, TOD_NSPEC ) / TOD_NSPEC; S_p2a = S_max - S_ave; +#ifdef FIX_SP2A + if ( element_brate <= IVAS_16k4 ) + { + thr_sp2a = THR_P2A_HIGH; + } + else + { + thr_sp2a = THR_P2A; + } +#endif + /* initial 3-way selection of coding modes (ACELP/GSC/TCX) */ +#ifdef FIX_SP2A + if ( relE > -10.0f && ( S_p2a > thr_sp2a || ton > hSpMusClas->tod_thr_lt ) ) +#else if ( relE > -10.0f && ( S_p2a > THR_P2A || ton > hSpMusClas->tod_thr_lt ) ) +#endif { /* select TCX to encode extremely peaky signals or strongly tonal signals */ st->sp_aud_decision1 = 1; -- GitLab