diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 406fbcd27d810326b6eb597d060a4a5e9b3112ff..d0fd20fd3c1c34dceb51fffad4634d990a44101c 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -215,8 +215,14 @@ ivas_error pre_proc_front_ivas( const int16_t flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */ const int16_t front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */ const int16_t force_front_vad, /* i : flag to force VAD decision */ +#ifdef LOW_RATE_TRANS + const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/ + const int32_t ivas_total_brate, /* i : IVAS total bitrate */ + const int16_t ivas_format /* i : IVAS format */ +#else const int16_t front_vad_dtx_flag /* i : front-VAD DTX flag to overwrite VAD decision*/ - ,const int32_t ivas_total_brate /* i : IVAS total bitrate */ + ,const int32_t ivas_total_brate /* i : IVAS total bitrate */ +#endif ); ivas_error pre_proc_ivas( @@ -621,6 +627,14 @@ void set_transient_stereo( float currFlatness[] /* i/o: current flatness */ ); +#ifdef LOW_RATE_TRANS +int16_t transient_analysis( + TRAN_DET_HANDLE hTranDet, /* i : handle transient detection */ + const float cor_map_LT[], /* i : LT correlation map */ + const float multi_harm_limit /* i : multi harminic threshold */ +); +#endif + void ivas_post_proc( SCE_DEC_HANDLE hSCE, /* i/o: SCE decoder structure */ CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ diff --git a/lib_com/options.h b/lib_com/options.h index 7ef067e7bab9b5ed06e589f9d5e8c00a7677ea69..551e0e1f58562825cb353296b8604ff10dac3cb8 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -155,6 +155,7 @@ #define FIX_I59_DELAY_ROUNDING /* Issue 59: rounding in sample domain instead of nanosec for IVAS_ENC_GetDelay() and IVAS_DEC_GetDelay() */ #define FIX_FIX_I59 /* Issue 59: small fix concerning LFE delay rounding */ #define FIX_I59_CREND /* Issue 59: Fixes for gcc compiler warnings in ivas_crend_utest_utils.c */ +#define LOW_RATE_TRANS /* Eri: Contribution 20: low rate encoding of transients */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_enc/ivas_core_pre_proc_front.c b/lib_enc/ivas_core_pre_proc_front.c index 1fdb26805c24dc4a4d1a2216f90c89985489c007..fffec70b0540443fe625c81393ba599b34705450 100644 --- a/lib_enc/ivas_core_pre_proc_front.c +++ b/lib_enc/ivas_core_pre_proc_front.c @@ -45,6 +45,14 @@ #include +#ifdef LOW_RATE_TRANS +/*---------------------------------------------------------------* + * Local constants + *---------------------------------------------------------------*/ + +#define SCE_SMC_THR 16000 +#endif + /*-------------------------------------------------------------------* * Local function prototypes *--------------------------------------------------------------------*/ @@ -100,9 +108,15 @@ ivas_error pre_proc_front_ivas( const int16_t flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */ const int16_t front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */ const int16_t force_front_vad, /* i : flag to force VAD decision */ - const int16_t front_vad_dtx_flag /* i : front-VAD DTX flag to overwrite VAD decision*/ +#ifdef LOW_RATE_TRANS + const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/ + const int32_t ivas_total_brate, /* i : IVAS total bitrate - for setting the DTX */ + const int16_t ivas_format /* i : IVAS format */ +#else + const int16_t front_vad_dtx_flag /* i : front-VAD DTX flag to overwrite VAD decision*/ , const int32_t ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */ +#endif ) { float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */ @@ -784,7 +798,11 @@ ivas_error pre_proc_front_ivas( else if ( element_mode != IVAS_CPE_MDCT ) { /* SNR-based speech/music classification */ +#ifdef LOW_RATE_TRANS + if ( ( element_mode >= IVAS_CPE_DFT && element_brate >= IVAS_24k4 ) || ( element_mode == IVAS_SCE && element_brate >= SCE_SMC_THR ) ) +#else if ( ( element_mode >= IVAS_CPE_DFT && element_brate >= IVAS_24k4 ) || ( element_mode == IVAS_SCE && element_brate >= 16000 ) ) +#endif { if ( flag_16k_smc ) { @@ -802,7 +820,29 @@ ivas_error pre_proc_front_ivas( smc_dec = ivas_acelp_tcx20_switching( st, inp_12k8, wsp, non_staX, pitch_fr, voicing_fr, currFlatness, lsp_mid, stab_fac, res_cod_SNR_M, flag_16k_smc ); } } - +#ifdef LOW_RATE_TRANS + /* Switch to ACELP for non-harmonic transient signals */ + else if ( ( ( ivas_format == STEREO_FORMAT && element_brate <= IVAS_16k4 ) || ( ivas_format == ISM_FORMAT && element_brate < SCE_SMC_THR ) ) && ( loc_harm[0] != 1 ) && smc_dec == MUSIC ) + { + if ( element_mode == IVAS_SCE ) + { + if ( transient_analysis( st->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) ) + { + smc_dec = SPEECH; + } + } + else if ( element_mode == IVAS_CPE_DFT ) + { + for ( i = 0; i < CPE_CHANNELS; i++ ) + { + if ( smc_dec != SPEECH && transient_analysis( hCPE->hCoreCoder[i]->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) ) + { + smc_dec = SPEECH; /* overwrite initial music decision, intial SPEECH_MUSIC never changed */ + } + } + } + } +#endif /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */ ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, *Etot, attack_flag, inp_12k8, S_map, flag_spitch ); diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index b422bf8c0c57a55829095898815054534267ee78..5735de87d0e9b97dbb8d28b4394a874213fff9f5 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -426,10 +426,17 @@ ivas_error ivas_cpe_enc( for ( n = 0; n < n_CoreChannels; n++ ) { +#ifdef LOW_RATE_TRANS + error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], &Etot[n], &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], + &vad_hover_flag[n], &attack_flag[n], realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n], + fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, + ivas_total_brate, st_ivas->hEncoderConfig->ivas_format ); +#else error = pre_proc_front_ivas( NULL, hCPE, hCPE->element_brate, nb_bits_metadata, input_frame, n, old_inp_12k8[n], old_inp_16k[n], &Etot[n], &ener[n], &relE[n], A[n], Aw[n], epsP[n], lsp_new[n], lsp_mid[n], &vad_hover_flag[n], &attack_flag[n], realBuffer[n], imagBuffer[n], old_wsp[n], pitch_fr[n], voicing_fr[n], &loc_harm[n], &cor_map_sum[n], &vad_flag_dtx[n], enerBuffer[n], fft_buff[n], A[0], lsp_new[0], currFlatness[n], tdm_ratio_idx, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, band_energies_LR, 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, 0, 0, ivas_total_brate ); +#endif if ( error != IVAS_ERR_OK ) { return error; diff --git a/lib_enc/ivas_ism_enc.c b/lib_enc/ivas_ism_enc.c index 39d5b42a07fbc7adbfe56496949beed2abbeceba..33fef080a4168759ba50afd340fc64b3807fe534 100644 --- a/lib_enc/ivas_ism_enc.c +++ b/lib_enc/ivas_ism_enc.c @@ -147,11 +147,19 @@ ivas_error ivas_ism_enc( * Front Pre-processing *----------------------------------------------------------------*/ +#ifdef LOW_RATE_TRANS + error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata[sce_id], input_frame, 0, old_inp_12k8[sce_id][0], old_inp_16k[sce_id][0], + &Etot[sce_id][0], &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], + &vad_hover_flag[sce_id][0], &attack_flag[sce_id][0], realBuffer[sce_id][0], imagBuffer[sce_id][0], old_wsp[sce_id][0], pitch_fr[sce_id][0], voicing_fr[sce_id][0], &loc_harm[sce_id][0], &cor_map_sum[sce_id][0], &vad_flag_dtx[sce_id][0], enerBuffer[sce_id][0], + fft_buff[sce_id][0], A[sce_id][0], lsp_new[sce_id][0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, 0, 0, 0, 0, + st_ivas->hEncoderConfig->ivas_format, st_ivas->hEncoderConfig->ivas_total_brate ); +#else error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata[sce_id], input_frame, 0, old_inp_12k8[sce_id][0], old_inp_16k[sce_id][0], &Etot[sce_id][0], &ener[sce_id][0], &relE[sce_id][0], A[sce_id][0], Aw[sce_id][0], epsP[sce_id][0], lsp_new[sce_id][0], lsp_mid[sce_id][0], &vad_hover_flag[sce_id][0], &attack_flag[sce_id][0], realBuffer[sce_id][0], imagBuffer[sce_id][0], old_wsp[sce_id][0], pitch_fr[sce_id][0], voicing_fr[sce_id][0], &loc_harm[sce_id][0], &cor_map_sum[sce_id][0], &vad_flag_dtx[sce_id][0], enerBuffer[sce_id][0], fft_buff[sce_id][0], A[sce_id][0], lsp_new[sce_id][0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, 0, 0, 0, 0, st_ivas->hEncoderConfig->ivas_total_brate ); +#endif if ( error != IVAS_ERR_OK ) { return error; diff --git a/lib_enc/ivas_sce_enc.c b/lib_enc/ivas_sce_enc.c index 2a74fcf68d5c261a81a43fd12c6974684a98440f..428dbb95c16cc51c18e09cd1fa50d11e9f73d8cb 100644 --- a/lib_enc/ivas_sce_enc.c +++ b/lib_enc/ivas_sce_enc.c @@ -181,12 +181,21 @@ ivas_error ivas_sce_enc( * Front Pre-processing *----------------------------------------------------------------*/ +#ifdef LOW_RATE_TRANS + error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata, input_frame, 0, old_inp_12k8[0], old_inp_16k[0], + &Etot[0], &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], + &vad_hover_flag[0], &attack_flag[0], realBuffer[0], imagBuffer[0], old_wsp[0], pitch_fr[0], voicing_fr[0], &loc_harm[0], &cor_map_sum[0], &vad_flag_dtx[0], enerBuffer[0], + fft_buff[0], A[0], lsp_new[0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, flag_16k_smc, + st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->force_front_vad : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_dtx_flag : 0, + st_ivas->hEncoderConfig->ivas_total_brate, st_ivas->hEncoderConfig->ivas_format ); +#else error = pre_proc_front_ivas( hSCE, NULL, hSCE->element_brate, nb_bits_metadata, input_frame, 0, old_inp_12k8[0], old_inp_16k[0], &Etot[0], &ener[0], &relE[0], A[0], Aw[0], epsP[0], lsp_new[0], lsp_mid[0], &vad_hover_flag[0], &attack_flag[0], realBuffer[0], imagBuffer[0], old_wsp[0], pitch_fr[0], voicing_fr[0], &loc_harm[0], &cor_map_sum[0], &vad_flag_dtx[0], enerBuffer[0], fft_buff[0], A[0], lsp_new[0], currFlatness[0], 0, fr_bands, Etot_LR, lf_E, localVAD_HE_SAD, NULL, flag_16k_smc, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_flag : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->force_front_vad : 0, st_ivas->hSpar != NULL ? st_ivas->hSpar->front_vad_dtx_flag : 0, st_ivas->hEncoderConfig->ivas_total_brate ); +#endif if ( error != IVAS_ERR_OK ) { return error; diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index de0451d3bc63d17a62bf119c6059203aed678ab0..8f8c054cff948b67ed1cbcd31876be6720a054bb 100644 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -123,6 +123,10 @@ typedef struct float firState1; float firState2; +#ifdef LOW_RATE_TRANS + int16_t ramp_up_flag; /* bit map flags to indicate a ramp up in beginning of TCX frame */ +#endif + } SubblockEnergies; diff --git a/lib_enc/transient_detection.c b/lib_enc/transient_detection.c index 469c7c4d8d0f0bb10ff49837504a3455a8309bd4..2b10e7cbe4076b39ce983fd21b935e4863e032af 100644 --- a/lib_enc/transient_detection.c +++ b/lib_enc/transient_detection.c @@ -53,6 +53,13 @@ #define MIN_BLOCK_ENERGY 107.37f +#ifdef LOW_RATE_TRANS +#define THR_HIGH 8.5f +#define THR_NORM_HIGH 8.0f +#define THR_NORM_LOW 4.5f +#define THR_LOW 4.25f +#define THR_LOW_STEP 1.0f +#endif /*---------------------------------------------------------------* * Local function prototypes @@ -218,6 +225,10 @@ void RunTransientDetection( float filteredInput[L_FRAME_MAX]; SubblockEnergies *pSubblockEnergies = &hTranDet->subblockEnergies; TransientDetector *pTransientDetector = &hTranDet->transientDetector; +#ifdef LOW_RATE_TRANS + float e0; + float e1; +#endif assert( ( input != NULL ) && ( hTranDet != NULL ) && ( pSubblockEnergies != NULL ) && ( pTransientDetector != NULL ) ); @@ -233,6 +244,17 @@ void RunTransientDetection( /* Update the delay buffer. */ UpdateDelayBuffer( filteredInput, length, &hTranDet->delayBuffer ); +#ifdef LOW_RATE_TRANS + /* compute ramp up flag */ + pSubblockEnergies->ramp_up_flag = pSubblockEnergies->ramp_up_flag << 1; + e0 = dotp( filteredInput + length / 2, filteredInput + length / 2, pSubblockEnergies->pDelayBuffer->nSubblockSize / 2 ) + 0.5f * MIN_BLOCK_ENERGY; + e1 = pSubblockEnergies->subblockNrg[pSubblockEnergies->nDelay + 4] - e0; + if ( e1 > e0 ) + { + pSubblockEnergies->ramp_up_flag |= 0x0001; + } +#endif + return; } @@ -557,6 +579,9 @@ static void InitTransientDetector( pTransientDetector->attackRatioThreshold = attackRatioThreshold; pTransientDetector->bIsAttackPresent = FALSE; pTransientDetector->attackIndex = -1; +#ifdef LOW_RATE_TRANS + pTransientDetector->pSubblockEnergies->ramp_up_flag = 0x0; +#endif return; } @@ -824,3 +849,100 @@ void set_transient_stereo( return; } + +#ifdef LOW_RATE_TRANS +/*-------------------------------------------------------------------* + * transient_analysis() + * + * + *-------------------------------------------------------------------*/ +/*! r: preliminary flag to force ACELP */ +int16_t transient_analysis( + TRAN_DET_HANDLE hTranDet, /* i : handle transient detection */ + const float cor_map_LT[], /* i : LT correlation map */ + const float multi_harm_limit /* i : multi harminic threshold */ +) +{ + const float *pSubblockNrg; + float accSubblockNrgRev[NSUBBLOCKS]; /* store acc Nrg in reversed signal */ + float *pTmp; /* point to acc Nrg */ + int16_t offset; + int16_t i; + float thr_fwd; + float thr_rev; + const int16_t nRelativeDelay = hTranDet->subblockEnergies.nDelay - hTranDet->transientDetector.nDelay; + int16_t prel_force_td; + float cor_map_LT_sum; + + pTmp = &accSubblockNrgRev[NSUBBLOCKS - 1]; + offset = nRelativeDelay - 4; + prel_force_td = FALSE; + + /* summation of the LT correlation map */ + cor_map_LT_sum = sum_f( cor_map_LT, L_FFT / 2 ); /* Note maybe BE optimized by computing inside noise_est */ + + thr_fwd = THR_NORM_HIGH; + if ( cor_map_LT_sum > multi_harm_limit * 0.8f ) + { + thr_fwd = THR_HIGH; + } + thr_rev = THR_LOW; + if ( cor_map_LT_sum > multi_harm_limit * 0.6f ) + { + thr_rev = THR_NORM_LOW; + } + + /* forward attack analysis */ + for ( i = -2; i < 7; i++ ) + { + if ( hTranDet->subblockEnergies.subblockNrg[nRelativeDelay + i] > hTranDet->subblockEnergies.accSubblockNrg[nRelativeDelay + i] * thr_fwd ) + { + prel_force_td |= 0x0001; + } + } + + if ( prel_force_td == 0 ) + { + /* release analysis */ + pSubblockNrg = hTranDet->transientDetector.pSubblockEnergies->subblockNrg; + set_zero( accSubblockNrgRev, NSUBBLOCKS ); + + for ( i = NSUBBLOCKS - 1; i > -1; i-- ) + { + if ( i == NSUBBLOCKS - 1 ) + { + accSubblockNrgRev[i] = pSubblockNrg[i + offset]; + } + else + { + accSubblockNrgRev[i] = *pTmp; + *pTmp *= hTranDet->transientDetector.pSubblockEnergies->facAccSubblockNrg; + if ( pSubblockNrg[i + offset] > *pTmp ) + { + *pTmp = pSubblockNrg[i + offset]; + } + } + } + + /* -3 check */ + if ( pSubblockNrg[1 + offset] > accSubblockNrgRev[1] * thr_rev ) + { + prel_force_td |= 0x0002; + } + + /* -4 check */ + if ( prel_force_td == 0 && pSubblockNrg[offset] > accSubblockNrgRev[0] * thr_rev ) + { + if ( pSubblockNrg[offset] > accSubblockNrgRev[0] * ( thr_rev + THR_LOW_STEP ) ) + { + prel_force_td |= 0x0004; + } + else if ( ( hTranDet->subblockEnergies.ramp_up_flag & 0x0002 ) != 0 ) + { + prel_force_td |= 0x0008; + } + } + } + return prel_force_td != 0; +} +#endif