From 8989debeb2893b0ea76eed959d6f65c0b281dbeb Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 21 Oct 2025 07:56:36 +0200 Subject: [PATCH 1/5] fix for suboptimal spatial CNG in MDCT-Stereo DTX --- lib_com/bitstream.c | 3 +- lib_com/fd_cng_com.c | 5 + lib_com/ivas_cnst.h | 5 + lib_com/ivas_prot.h | 2 + lib_com/ivas_rom_com.c | 6 + lib_com/ivas_rom_com.h | 3 + lib_com/options.h | 1 + lib_com/stat_com.h | 7 +- lib_dec/fd_cng_dec.c | 157 +++++++++++++++++++++++++- lib_dec/ivas_decision_matrix_dec.c | 4 + lib_dec/ivas_ism_metadata_dec.c | 22 ++++ lib_dec/ivas_ism_param_dec.c | 4 + lib_dec/ivas_stereo_mdct_stereo_dec.c | 4 + lib_enc/fd_cng_enc.c | 99 +++++++++++++++- lib_enc/stat_enc.h | 4 + 15 files changed, 322 insertions(+), 4 deletions(-) diff --git a/lib_com/bitstream.c b/lib_com/bitstream.c index 5309e6735d..bd8bf608de 100644 --- a/lib_com/bitstream.c +++ b/lib_com/bitstream.c @@ -5409,7 +5409,7 @@ void evs_dec_previewFrame( return; } - +#ifndef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG void dtx_read_padding_bits( DEC_CORE_HANDLE st, @@ -5425,3 +5425,4 @@ void dtx_read_padding_bits( return; } +#endif diff --git a/lib_com/fd_cng_com.c b/lib_com/fd_cng_com.c index 4f57ceaed9..608e2ed50e 100644 --- a/lib_com/fd_cng_com.c +++ b/lib_com/fd_cng_com.c @@ -34,6 +34,7 @@ EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0 ====================================================================================*/ +#include "ivas_cnst.h" #include #include #include "options.h" @@ -140,7 +141,11 @@ void initFdCngCom( hFdCngCom->msMinBufferPtr = 0; set_f( hFdCngCom->msAlphaCor, 0.3f, 2 ); +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + set_f( hFdCngCom->coherence, 0.5f, MDCT_ST_DTX_NUM_COHERENCE_BANDS ); +#else hFdCngCom->coherence = 0.5f; +#endif return; } diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 6f679f0fd8..e16553e85a 100755 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -850,6 +850,11 @@ enum fea_names #define MDCT_ST_PLC_FADEOUT_TO_ZERO_LEN 20 #define MDCT_ST_PLC_FADEOUT_DELAY_4_LSP_FADE 3 +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG +#define MDCT_ST_DTX_NUM_COHERENCE_BANDS 5 +#define MDCT_ST_DTX_FIRST_BAND_OFFSET 2 +#endif + typedef enum { NOISE_GEN_MODE_UNDEF = -1, EQUAL_CORES = 0, diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index eb75ab124d..bd168207f0 100755 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -793,12 +793,14 @@ Word16 matrix_product_q30_fx( Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Q30*/ ); +#ifndef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG void dtx_read_padding_bits( DEC_CORE_HANDLE st, const int16_t num_bits ); +#endif void ivas_apply_non_diegetic_panning( float *input_f, /* i : non-diegetic object */ float *output_f[], /* o : core-coder transport mono channel/stereo output */ diff --git a/lib_com/ivas_rom_com.c b/lib_com/ivas_rom_com.c index 237583bb58..75098e78f5 100644 --- a/lib_com/ivas_rom_com.c +++ b/lib_com/ivas_rom_com.c @@ -793,6 +793,12 @@ const float nf_tw_smoothing_coeffs[N_LTP_GAIN_MEMS] = 0.4f,0.2f,0.2f,0.2f }; +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG +const int16_t mdct_stereo_dtx_coherence_bandlengths[MDCT_ST_DTX_NUM_COHERENCE_BANDS] = { + 6, /* 8 - MDCT_ST_DTX_FIRST_BAND_OFFSET */ 8, 16, 16, 80 +}; +#endif + /*----------------------------------------------------------------------------------* * Stereo DTX tables diff --git a/lib_com/ivas_rom_com.h b/lib_com/ivas_rom_com.h index c68e16ba9a..b90a30699f 100644 --- a/lib_com/ivas_rom_com.h +++ b/lib_com/ivas_rom_com.h @@ -138,6 +138,9 @@ extern const SpectrumWarping sw32000Hz[]; /* PsychLPC */ extern const MDCTStereoBands_config mdctStereoBands_32000_640[]; extern const float nf_tw_smoothing_coeffs[N_LTP_GAIN_MEMS]; +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG +extern const int16_t mdct_stereo_dtx_coherence_bandlengths[MDCT_ST_DTX_NUM_COHERENCE_BANDS]; +#endif /*----------------------------------------------------------------------------------* * Stereo DTX ROM tables diff --git a/lib_com/options.h b/lib_com/options.h index 151c651bb8..b341621ded 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -182,6 +182,7 @@ #define NONBE_1399_1400_FIX_OBJ_EDIT_ISSUES /* Nokia: Fix for issues 1399: obj edit broken with MC/SBA output in VOIP, and 1400: negative energy estimate used for gaining. */ #define NONBE_1412_AVOID_ROUNDING_AZ_ELEV /* FhG: Avoid rounding when passing azimuth and elevation to efap_determine_gains() */ +#define NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG /* FhG: Fix MDCT-Stereo comfort noise for certain noise types */ /* ##################### End NON-BE switches ########################### */ diff --git a/lib_com/stat_com.h b/lib_com/stat_com.h index 4f459ef531..2eb9b5ee95 100644 --- a/lib_com/stat_com.h +++ b/lib_com/stat_com.h @@ -38,6 +38,7 @@ #ifndef STAT_COM_H #define STAT_COM_H +#include "ivas_cnst.h" #include #include "options.h" #include "typedef.h" @@ -404,7 +405,11 @@ typedef struct int16_t flag_noisy_speech; float likelihood_noisy_speech; - float coherence; /* inter-channel coherence of noise */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + float coherence[MDCT_ST_DTX_NUM_COHERENCE_BANDS]; /* inter-channel coherence of noise */ +#else + float coherence; /* inter-channel coherence of noise */ +#endif int16_t no_side_flag; /* indicates whether the side noise shape should be zeroed-out or not */ } FD_CNG_COM, *HANDLE_FD_CNG_COM; diff --git a/lib_dec/fd_cng_dec.c b/lib_dec/fd_cng_dec.c index 535af8b39c..c34364f090 100644 --- a/lib_dec/fd_cng_dec.c +++ b/lib_dec/fd_cng_dec.c @@ -34,6 +34,8 @@ EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0 ====================================================================================*/ +#include "ivas_cnst.h" +#include "ivas_rom_com.h" #include #include #include "options.h" @@ -422,7 +424,7 @@ void ApplyFdCng( /* set noise estimation inactive when we have bit errors, as no update with noise generated by corrupt frame (biterror) should be performed. */ if ( concealWholeFrame == 0 && ( timeDomainInput == NULL || - ( *timeDomainInput( -FLT_MAX ) && + ( *timeDomainInput < FLT_MAX && *timeDomainInput > ( -FLT_MAX ) && *( timeDomainInput + hFdCngCom->frameSize - 1 ) < FLT_MAX && *( timeDomainInput + hFdCngCom->frameSize - 1 ) > ( -FLT_MAX ) ) ) && ( ( ( ( st->element_mode != IVAS_CPE_TD && st->element_mode != IVAS_CPE_DFT && hFdCngDec->flag_dtx_mode ) || !st->VAD || ( st->ini_frame < 100 && st->is_ism_format ) ) && @@ -1100,8 +1102,13 @@ void generate_comfort_noise_dec( scale = 1.f; scaleCldfb = CLDFB_SCALING / hFdCngCom->scalingFactor; +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + c1 = (float) sqrt( hFdCngCom->coherence[0] ); + c2 = (float) sqrt( 1 - hFdCngCom->coherence[0] ); +#else c1 = (float) sqrt( hFdCngCom->coherence ); c2 = (float) sqrt( 1 - hFdCngCom->coherence ); +#endif seed2 = &( hFdCngCom->seed2 ); if ( st->element_mode == IVAS_CPE_MDCT && st->idchan == 1 ) @@ -1114,7 +1121,11 @@ void generate_comfort_noise_dec( if ( hFdCngCom->startBand == 0 ) { +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + if ( ( st->element_mode == IVAS_CPE_MDCT && nchan_out != 1 ) || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#else if ( st->element_mode == IVAS_CPE_MDCT || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#endif { rand_gauss( &tmp1, seed ); rand_gauss( &tmp2, seed2 ); @@ -1136,10 +1147,102 @@ void generate_comfort_noise_dec( } ptr_i = ptr_r + 1; +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + if ( st->element_mode == IVAS_CPE_MDCT && nchan_out != 1 ) + { + int16_t band_len_accu; + + band_len_accu = 0; + i = 0; + for ( int16_t b = 0; b < MDCT_ST_DTX_NUM_COHERENCE_BANDS; b++ ) + { + band_len_accu += mdct_stereo_dtx_coherence_bandlengths[b]; + + /* First band needs to be shortened. The offset from encoder-side estimation is already in, so add it back here */ + if ( b == 0 ) + { + band_len_accu += MDCT_ST_DTX_FIRST_BAND_OFFSET - hFdCngCom->startBand; + } + + /* + * for last band, we need to keep going until the end of the fft section - if there is still any + * this way, the coherence value of the last band is used for eveyrthing above as well + */ + if ( b == MDCT_ST_DTX_NUM_COHERENCE_BANDS - 1 ) + { + band_len_accu = max( band_len_accu, hFdCngCom->stopFFTbin - hFdCngCom->startBand ); + } + + /* mixing values for coherence is now frequency-dependent */ + c1 = (float) sqrt( hFdCngCom->coherence[b] ); + c2 = (float) sqrt( 1 - hFdCngCom->coherence[b] ); + + for ( ; i < band_len_accu; i++ ) + { + float val_level; + val_level = (float) sqrt( ( scale * *ptr_level ) * 0.5f ); + + /* Real part in FFT bins */ + rand_gauss( &tmp1, seed ); + rand_gauss( &tmp2, seed2 ); + *ptr_r = tmp1 * c1 + tmp2 * c2; + ( *ptr_r ) *= val_level; + + /* Imaginary part in FFT bins */ + rand_gauss( &tmp1, seed ); + rand_gauss( &tmp2, seed2 ); + *ptr_i = tmp1 * c1 + tmp2 * c2; + ( *ptr_i ) *= val_level; + + /* advance all pointers together here */ + ptr_r += 2; + ptr_i += 2; + ptr_level++; + } + } + } + else + { + for ( ; ptr_level < cngNoiseLevel + hFdCngCom->stopFFTbin - hFdCngCom->startBand; ptr_level++ ) + { + /* Real part in FFT bins */ + if ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) + { + rand_gauss( &tmp1, seed ); + rand_gauss( &tmp2, seed2 ); + *ptr_r = tmp1 * c1 + tmp2 * c2; + } + else + { + rand_gauss( ptr_r, seed ); + } + ( *ptr_r ) *= (float) sqrt( ( scale * *ptr_level ) * 0.5f ); + ptr_r += 2; + + /* Imaginary part in FFT bins */ + if ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) + { + rand_gauss( &tmp1, seed ); + rand_gauss( &tmp2, seed2 ); + *ptr_i = tmp1 * c1 + tmp2 * c2; + } + else + { + rand_gauss( ptr_i, seed ); + } + ( *ptr_i ) *= (float) sqrt( ( scale * *ptr_level ) * 0.5f ); + ptr_i += 2; + } + } +#else for ( ; ptr_level < cngNoiseLevel + hFdCngCom->stopFFTbin - hFdCngCom->startBand; ptr_level++ ) { /* Real part in FFT bins */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + if ( ( st->element_mode == IVAS_CPE_MDCT && nchan_out != 1 ) || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#else if ( st->element_mode == IVAS_CPE_MDCT || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#endif { rand_gauss( &tmp1, seed ); rand_gauss( &tmp2, seed2 ); @@ -1153,7 +1256,11 @@ void generate_comfort_noise_dec( ptr_r += 2; /* Imaginary part in FFT bins */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + if ( ( st->element_mode == IVAS_CPE_MDCT && nchan_out != 1 ) || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#else if ( st->element_mode == IVAS_CPE_MDCT || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#endif { rand_gauss( &tmp1, seed ); rand_gauss( &tmp2, seed2 ); @@ -1166,6 +1273,7 @@ void generate_comfort_noise_dec( ( *ptr_i ) *= (float) sqrt( ( scale * *ptr_level ) * 0.5f ); ptr_i += 2; } +#endif /* Remaining FFT bins are set to zero */ set_f( fftBuffer + 2 * hFdCngCom->stopFFTbin, 0.0f, hFdCngCom->fftlen - 2 * hFdCngCom->stopFFTbin ); @@ -1224,6 +1332,13 @@ void generate_comfort_noise_dec( /* Generate Gaussian random noise in real and imaginary parts of the CLDFB bands Amplitudes are adjusted to the estimated noise level cngNoiseLevel in each band */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + /* + * Note: for the stereo DTX noise mixing, c1 and c2 at this point are set to the value calculated for the last band + * as all the coherence bands are in the FFT region, we do not need the special handling here + */ +#endif + if ( bufferReal != NULL && hFdCngCom->numCoreBands < hFdCngCom->regularStopBand ) { for ( j = hFdCngCom->numCoreBands; j < hFdCngCom->regularStopBand; j++ ) @@ -1231,7 +1346,11 @@ void generate_comfort_noise_dec( for ( i = 0; i < hFdCngCom->numSlots; i++ ) { /* Real part in CLDFB band */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + if ( ( st->element_mode == IVAS_CPE_MDCT && nchan_out != 1 ) || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#else if ( st->element_mode == IVAS_CPE_MDCT || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#endif { rand_gauss( &tmp1, seed ); rand_gauss( &tmp2, seed2 ); @@ -1244,7 +1363,11 @@ void generate_comfort_noise_dec( bufferReal[i][j] *= (float) sqrt( ( scaleCldfb * *ptr_level ) * 0.5f ); /* Imaginary part in CLDFB band */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + if ( ( st->element_mode == IVAS_CPE_MDCT && nchan_out != 1 ) || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#else if ( st->element_mode == IVAS_CPE_MDCT || ( st->element_mode == IVAS_SCE && st->cng_ism_flag ) ) +#endif { rand_gauss( &tmp1, seed ); rand_gauss( &tmp2, seed2 ); @@ -1361,8 +1484,14 @@ void generate_comfort_noise_dec_hf( { seed2 = &( hFdCngCom->seed2 ); +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + /* alwas use the value for the last band - frequency-wise we are here always above */ + c1 = (float) sqrt( hFdCngCom->coherence[MDCT_ST_DTX_NUM_COHERENCE_BANDS - 1] ); + c2 = (float) sqrt( 1 - hFdCngCom->coherence[MDCT_ST_DTX_NUM_COHERENCE_BANDS - 1] ); +#else c1 = (float) sqrt( hFdCngCom->coherence ); c2 = (float) sqrt( 1 - hFdCngCom->coherence ); +#endif } ptr_level = hFdCngCom->cngNoiseLevel + hFdCngCom->stopFFTbin - hFdCngCom->startBand; @@ -2156,7 +2285,25 @@ void FdCngDecodeMDCTStereoSID( msvq_dec( cdk_37bits_ivas, NULL, NULL, stages, N, FD_CNG_maxN_37bits, indices, 1, invTrfMatrix, ms_ptr[ch], NULL ); } +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + /* TODO: temporary hack, need to decide what to do with core-coder bitrate */ + int32_t tmp; + + tmp = sts[1]->total_brate; + sts[1]->total_brate = sts[1]->total_brate + 16 * FRAMES_PER_SEC; + /* read the four additional coherence values */ + for ( int16_t b = 1; b < MDCT_ST_DTX_NUM_COHERENCE_BANDS; b++ ) + { + uint16_t tmp_bit; + + tmp_bit = get_next_indice( sts[1], 4 ); + sts[0]->hFdCngDec->hFdCngCom->coherence[b] = (float) tmp_bit / 15.f; + sts[1]->hFdCngDec->hFdCngCom->coherence[b] = sts[0]->hFdCngDec->hFdCngCom->coherence[b]; + } + sts[1]->total_brate = tmp; +#else dtx_read_padding_bits( sts[1], ( IVAS_SID_5k2 - 4400 ) / FRAMES_PER_SEC ); +#endif if ( sts[0]->hFdCngDec->hFdCngCom->no_side_flag ) { @@ -2267,8 +2414,16 @@ void FdCngDecodeDiracMDCTStereoSID( lpc_from_spectrum( hFdCngCom, hFdCngCom->startBand, hFdCngCom->stopFFTbin, sts[ch]->preemph_fac ); } +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + { + sts[0]->hFdCngDec->hFdCngCom->coherence[i] = 0.0f; + sts[1]->hFdCngDec->hFdCngCom->coherence[i] = 0.0f; + } +#else sts[0]->hFdCngDec->hFdCngCom->coherence = 0.0f; sts[1]->hFdCngDec->hFdCngCom->coherence = 0.0f; +#endif if ( hCPE->nchan_out == 1 ) { diff --git a/lib_dec/ivas_decision_matrix_dec.c b/lib_dec/ivas_decision_matrix_dec.c index 47e8627865..da624d5a66 100644 --- a/lib_dec/ivas_decision_matrix_dec.c +++ b/lib_dec/ivas_decision_matrix_dec.c @@ -104,7 +104,11 @@ void ivas_decision_matrix_dec( else if ( st->total_brate == SID_2k40 && st->idchan == 1 && st->element_mode == IVAS_CPE_MDCT && st->cng_sba_flag == 0 ) { /* read channel coherence */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + st->hFdCngDec->hFdCngCom->coherence[0] = (float) get_next_indice( st, 4 ) / 15.f; +#else st->hFdCngDec->hFdCngCom->coherence = (float) get_next_indice( st, 4 ) / 15.f; +#endif /* read flag for no side noise shape */ st->hFdCngDec->hFdCngCom->no_side_flag = get_next_indice( st, 1 ); diff --git a/lib_dec/ivas_ism_metadata_dec.c b/lib_dec/ivas_ism_metadata_dec.c index 1a1e4161d4..d81b77de53 100644 --- a/lib_dec/ivas_ism_metadata_dec.c +++ b/lib_dec/ivas_ism_metadata_dec.c @@ -1057,18 +1057,40 @@ void ivas_ism_metadata_sid_dec( { if ( ch == *sce_id_dtx ) { +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + { + hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i] = 1.0f; + } +#else hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence = 1.0f; +#endif continue; } idx = get_next_indice( st0, nBits_coh ); +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[0] = (float) ( idx ) / (float) ( ( 1 << nBits_coh ) - 1 ); + for ( int16_t i = 1; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + { + hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i] = hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[0]; + } +#else hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence = (float) ( idx ) / (float) ( ( 1 << nBits_coh ) - 1 ); +#endif } } if ( ism_mode == ISM_MODE_PARAM ) { +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + { + hSCE[*sce_id_dtx]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i] = hSCE[!*sce_id_dtx]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i]; + } +#else hSCE[*sce_id_dtx]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence = hSCE[!*sce_id_dtx]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence; +#endif } /*----------------------------------------------------------------* diff --git a/lib_dec/ivas_ism_param_dec.c b/lib_dec/ivas_ism_param_dec.c index 253f480c6a..0c51b5216b 100644 --- a/lib_dec/ivas_ism_param_dec.c +++ b/lib_dec/ivas_ism_param_dec.c @@ -1288,7 +1288,11 @@ void ivas_param_ism_params_to_masa_param_mapping( if ( st_ivas->hISMDTX.dtx_flag ) { float energy_ratio; +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + energy_ratio = powf( st_ivas->hSCE[0]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[0], 2.0f ); +#else energy_ratio = powf( st_ivas->hSCE[0]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence, 2.0f ); +#endif hSpatParamRendCom->numSimultaneousDirections = 1; azimuth[0] = (int16_t) roundf( hParamIsmDec->azimuth_values[0] ); diff --git a/lib_dec/ivas_stereo_mdct_stereo_dec.c b/lib_dec/ivas_stereo_mdct_stereo_dec.c index f41ffb495d..3fd4b3184f 100644 --- a/lib_dec/ivas_stereo_mdct_stereo_dec.c +++ b/lib_dec/ivas_stereo_mdct_stereo_dec.c @@ -523,7 +523,11 @@ void synchonize_channels_mdct_sid( sts[1]->L_frame = sts[0]->L_frame; sts[1]->cng_type = sts[0]->cng_type; sts[1]->bwidth = sts[0]->bwidth; +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + sts[0]->hFdCngDec->hFdCngCom->coherence[0] = sts[1]->hFdCngDec->hFdCngCom->coherence[0]; /* coherence is stored in sts[1] - see ivas_decision_matrix_dec() and FdCngDecodeMDCTStereoSID() */ +#else sts[0]->hFdCngDec->hFdCngCom->coherence = sts[1]->hFdCngDec->hFdCngCom->coherence; /* coherence is stored in sts[1] - see ivas_decision_matrix_dec() */ +#endif sts[0]->hFdCngDec->hFdCngCom->no_side_flag = sts[1]->hFdCngDec->hFdCngCom->no_side_flag; /* configure when there is a switching from DFT CNG to MDCT CNG */ diff --git a/lib_enc/fd_cng_enc.c b/lib_enc/fd_cng_enc.c index da13d3deb8..da49568c45 100644 --- a/lib_enc/fd_cng_enc.c +++ b/lib_enc/fd_cng_enc.c @@ -34,6 +34,8 @@ EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0 ====================================================================================*/ +#include "cnst.h" +#include "ivas_cnst.h" #include #include #include "options.h" @@ -43,6 +45,9 @@ #include #include "rom_enc.h" #include "rom_com.h" +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG +#include "ivas_rom_com.h" +#endif #include "prot.h" #include "ivas_prot.h" #include "stat_enc.h" @@ -154,7 +159,14 @@ void initFdCngEnc( set_f( hFdCngEnc->msLogPeriodog, 0.0f, NPART ); set_f( hFdCngEnc->msLogNoiseEst, 0.0f, NPART ); +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + { + set_f( hFdCngEnc->mem_coherence[i], EPSILON, 4 ); + } +#else set_f( hFdCngEnc->mem_coherence, EPSILON, 4 ); +#endif return; } @@ -891,7 +903,14 @@ void stereoFdCngCoherence( if ( last_element_mode != IVAS_CPE_MDCT ) { +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + { + set_f( sts[0]->hFdCngEnc->mem_coherence[i], EPSILON, 4 ); + } +#else set_f( sts[0]->hFdCngEnc->mem_coherence, EPSILON, 4 ); +#endif } if ( sts[0]->core_brate == -1 || sts[1]->core_brate == -1 ) @@ -941,8 +960,65 @@ void stereoFdCngCoherence( pt_fftL = fft_buff[0]; pt_fftR = fft_buff[1]; +#ifndef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG mem = sts[0]->hFdCngEnc->mem_coherence; +#endif + + /* only estimate coherence in inactive frames (or in the first 50 frames to build an initial value) */ + if ( !( sts[0]->ini_frame <= 50 || ( sts[0]->vad_flag == 0 && sts[1]->vad_flag == 0 ) ) ) + { + return; + } +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + for ( i_subfr = 0; i_subfr < 2; i_subfr++ ) + { + int16_t band_len_cum; + + band_len_cum = 0; + i = MDCT_ST_DTX_FIRST_BAND_OFFSET; + + for ( int16_t b = 0; b < MDCT_ST_DTX_NUM_COHERENCE_BANDS; b++ ) + { + band_len_cum += mdct_stereo_dtx_coherence_bandlengths[b]; + cr = ci = eL = eR = EPSILON; + mem = sts[0]->hFdCngEnc->mem_coherence[b]; + + /* for last band, we need to make adjustments to fit with the FFT buffer layout */ + /* do the calculations for fftbin 0 (not anymore as it is skipped - we only calculate coherence above the 2nd bin) and L_FFT/2 outside the loop - imaginary part is always zero there, but not part of the buffer */ + if ( b == MDCT_ST_DTX_NUM_COHERENCE_BANDS - 1 ) + { + /* this calculates values for fft bin at L_FFT/2 - no imaginary value there, not even part of the array */ + cr += pt_fftL[L_FFT / 2] * pt_fftR[L_FFT / 2]; + eL += pt_fftL[L_FFT / 2] * pt_fftL[L_FFT / 2]; + eR += pt_fftR[L_FFT / 2] * pt_fftR[L_FFT / 2]; + + /* skip last fft bin (fs/2 Hz) bin in the loop later */ + --band_len_cum; + } + + for ( ; i < band_len_cum; i++ ) + { + cr += pt_fftL[i] * pt_fftR[i] + pt_fftL[L_FFT - i] * pt_fftR[L_FFT - i]; + ci += -pt_fftL[i] * pt_fftR[L_FFT - i] + pt_fftR[i] * pt_fftL[L_FFT - i]; + eL += pt_fftL[i] * pt_fftL[i] + pt_fftL[L_FFT - i] * pt_fftL[L_FFT - i]; + eR += pt_fftR[i] * pt_fftR[i] + pt_fftR[L_FFT - i] * pt_fftR[L_FFT - i]; + } + + mem[0] = 0.95f * mem[0] + 0.05f * cr; + mem[1] = 0.95f * mem[1] + 0.05f * ci; + mem[2] = 0.95f * mem[2] + 0.05f * eL; + mem[3] = 0.95f * mem[3] + 0.05f * eR; + + /* could be done outside the loop, as the second assignment overwrites the first, but this would mean a second loop over bands */ + sts[0]->hFdCngEnc->hFdCngCom->coherence[b] = sqrtf( ( mem[0] * mem[0] + mem[1] * mem[1] ) / ( mem[2] * mem[3] ) ); + } + + pt_fftL += L_FFT; + pt_fftR += L_FFT; + } + +#else for ( i_subfr = 0; i_subfr < 2; i_subfr++ ) { cr = ci = eL = eR = EPSILON; @@ -970,8 +1046,10 @@ void stereoFdCngCoherence( pt_fftL += L_FFT; pt_fftR += L_FFT; } - +#endif +#ifndef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG sts[0]->hFdCngEnc->hFdCngCom->coherence = sqrtf( ( mem[0] * mem[0] + mem[1] * mem[1] ) / ( mem[2] * mem[3] ) ); +#endif return; } @@ -1172,7 +1250,11 @@ void FdCngEncodeMDCTStereoSID( } /* quantize channel coherence */ +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + coh_idx = (int16_t) floor( sts[0]->hFdCngEnc->hFdCngCom->coherence[0] * 15.f + 0.5f ); +#else coh_idx = (int16_t) floor( sts[0]->hFdCngEnc->hFdCngCom->coherence * 15.f + 0.5f ); +#endif coh_idx = max( 0, min( coh_idx, 15 ) ); /* ---- Write SID bitstream ---- */ @@ -1206,8 +1288,18 @@ void FdCngEncodeMDCTStereoSID( push_indice( sts[ch]->hBstr, IND_ENERGY, gain_idx[ch], 7 ); } +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + /* write the four additional coherence values */ + for ( int16_t b = 1; b < MDCT_ST_DTX_NUM_COHERENCE_BANDS; b++ ) + { + coh_idx = (int16_t) floor( sts[0]->hFdCngEnc->hFdCngCom->coherence[b] * 15.f + 0.5f ); + coh_idx = max( 0, min( coh_idx, 15 ) ); + push_indice( sts[1]->hBstr, IND_ENERGY, coh_idx, 4 ); + } +#else /* pad with zeros to reach common SID frame size */ push_indice( sts[1]->hBstr, IND_ENERGY, 0, ( IVAS_SID_5k2 - 4400 ) / FRAMES_PER_SEC ); +#endif return; @@ -1348,8 +1440,13 @@ void FdCngEncodeDiracMDCTStereoSID( sts[ch]->hDtxEnc->last_CNG_L_frame = sts[ch]->L_frame; } +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + sts[0]->hFdCngEnc->hFdCngCom->coherence[0] = 0.0f; + sts[1]->hFdCngEnc->hFdCngCom->coherence[0] = 0.0f; +#else sts[0]->hFdCngEnc->hFdCngCom->coherence = 0.0f; sts[1]->hFdCngEnc->hFdCngCom->coherence = 0.0f; +#endif /* ---- Write SID bitstream ---- */ diff --git a/lib_enc/stat_enc.h b/lib_enc/stat_enc.h index 19a87b6a1d..4d6fe1134b 100755 --- a/lib_enc/stat_enc.h +++ b/lib_enc/stat_enc.h @@ -359,7 +359,11 @@ typedef struct fd_cng_enc_structure int16_t nFFTpartDec; int16_t partDec[NPART]; +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + float mem_coherence[MDCT_ST_DTX_NUM_COHERENCE_BANDS][4]; +#else float mem_coherence[4]; +#endif } FD_CNG_ENC, *HANDLE_FD_CNG_ENC; -- GitLab From dd07d21049744650e2fa10aaec632203aa420b19 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 21 Oct 2025 08:00:27 +0200 Subject: [PATCH 2/5] apply clang-format --- lib_dec/fd_cng_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/fd_cng_dec.c b/lib_dec/fd_cng_dec.c index c34364f090..e6435632b1 100644 --- a/lib_dec/fd_cng_dec.c +++ b/lib_dec/fd_cng_dec.c @@ -424,7 +424,7 @@ void ApplyFdCng( /* set noise estimation inactive when we have bit errors, as no update with noise generated by corrupt frame (biterror) should be performed. */ if ( concealWholeFrame == 0 && ( timeDomainInput == NULL || - ( *timeDomainInput < FLT_MAX && *timeDomainInput > ( -FLT_MAX ) && + ( *timeDomainInput( -FLT_MAX ) && *( timeDomainInput + hFdCngCom->frameSize - 1 ) < FLT_MAX && *( timeDomainInput + hFdCngCom->frameSize - 1 ) > ( -FLT_MAX ) ) ) && ( ( ( ( st->element_mode != IVAS_CPE_TD && st->element_mode != IVAS_CPE_DFT && hFdCngDec->flag_dtx_mode ) || !st->VAD || ( st->ini_frame < 100 && st->is_ism_format ) ) && -- GitLab From 106b0e638b46b4ce34757d4635c848dfed0214c3 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 21 Oct 2025 08:07:28 +0200 Subject: [PATCH 3/5] fix windows build --- lib_dec/fd_cng_dec.c | 4 ++-- lib_dec/ivas_ism_metadata_dec.c | 6 +++--- lib_enc/fd_cng_enc.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib_dec/fd_cng_dec.c b/lib_dec/fd_cng_dec.c index e6435632b1..a34b9f36c6 100644 --- a/lib_dec/fd_cng_dec.c +++ b/lib_dec/fd_cng_dec.c @@ -424,7 +424,7 @@ void ApplyFdCng( /* set noise estimation inactive when we have bit errors, as no update with noise generated by corrupt frame (biterror) should be performed. */ if ( concealWholeFrame == 0 && ( timeDomainInput == NULL || - ( *timeDomainInput( -FLT_MAX ) && + ( *timeDomainInput < FLT_MAX && *timeDomainInput > ( -FLT_MAX ) && *( timeDomainInput + hFdCngCom->frameSize - 1 ) < FLT_MAX && *( timeDomainInput + hFdCngCom->frameSize - 1 ) > ( -FLT_MAX ) ) ) && ( ( ( ( st->element_mode != IVAS_CPE_TD && st->element_mode != IVAS_CPE_DFT && hFdCngDec->flag_dtx_mode ) || !st->VAD || ( st->ini_frame < 100 && st->is_ism_format ) ) && @@ -2415,7 +2415,7 @@ void FdCngDecodeDiracMDCTStereoSID( lpc_from_spectrum( hFdCngCom, hFdCngCom->startBand, hFdCngCom->stopFFTbin, sts[ch]->preemph_fac ); } #ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG - for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + for ( i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) { sts[0]->hFdCngDec->hFdCngCom->coherence[i] = 0.0f; sts[1]->hFdCngDec->hFdCngCom->coherence[i] = 0.0f; diff --git a/lib_dec/ivas_ism_metadata_dec.c b/lib_dec/ivas_ism_metadata_dec.c index d81b77de53..5f65826b5b 100644 --- a/lib_dec/ivas_ism_metadata_dec.c +++ b/lib_dec/ivas_ism_metadata_dec.c @@ -1058,7 +1058,7 @@ void ivas_ism_metadata_sid_dec( if ( ch == *sce_id_dtx ) { #ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG - for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + for ( i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) { hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i] = 1.0f; } @@ -1071,7 +1071,7 @@ void ivas_ism_metadata_sid_dec( idx = get_next_indice( st0, nBits_coh ); #ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[0] = (float) ( idx ) / (float) ( ( 1 << nBits_coh ) - 1 ); - for ( int16_t i = 1; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + for ( i = 1; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) { hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i] = hSCE[ch]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[0]; } @@ -1084,7 +1084,7 @@ void ivas_ism_metadata_sid_dec( if ( ism_mode == ISM_MODE_PARAM ) { #ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG - for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + for ( i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) { hSCE[*sce_id_dtx]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i] = hSCE[!*sce_id_dtx]->hCoreCoder[0]->hFdCngDec->hFdCngCom->coherence[i]; } diff --git a/lib_enc/fd_cng_enc.c b/lib_enc/fd_cng_enc.c index da49568c45..84282281a4 100644 --- a/lib_enc/fd_cng_enc.c +++ b/lib_enc/fd_cng_enc.c @@ -904,7 +904,7 @@ void stereoFdCngCoherence( if ( last_element_mode != IVAS_CPE_MDCT ) { #ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG - for ( int16_t i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) + for ( i = 0; i < MDCT_ST_DTX_NUM_COHERENCE_BANDS; i++ ) { set_f( sts[0]->hFdCngEnc->mem_coherence[i], EPSILON, 4 ); } -- GitLab From 4b06cb88b0f5870e36a537a69a7b1de12ac3741f Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 21 Oct 2025 08:09:40 +0200 Subject: [PATCH 4/5] apply clang-format AGAIN --- lib_dec/fd_cng_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/fd_cng_dec.c b/lib_dec/fd_cng_dec.c index a34b9f36c6..1a6d4abb50 100644 --- a/lib_dec/fd_cng_dec.c +++ b/lib_dec/fd_cng_dec.c @@ -424,7 +424,7 @@ void ApplyFdCng( /* set noise estimation inactive when we have bit errors, as no update with noise generated by corrupt frame (biterror) should be performed. */ if ( concealWholeFrame == 0 && ( timeDomainInput == NULL || - ( *timeDomainInput < FLT_MAX && *timeDomainInput > ( -FLT_MAX ) && + ( *timeDomainInput( -FLT_MAX ) && *( timeDomainInput + hFdCngCom->frameSize - 1 ) < FLT_MAX && *( timeDomainInput + hFdCngCom->frameSize - 1 ) > ( -FLT_MAX ) ) ) && ( ( ( ( st->element_mode != IVAS_CPE_TD && st->element_mode != IVAS_CPE_DFT && hFdCngDec->flag_dtx_mode ) || !st->VAD || ( st->ini_frame < 100 && st->is_ism_format ) ) && -- GitLab From 3dfd25c664b1594ef6893020cdc28b714cfbf76c Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 21 Oct 2025 15:34:26 +0200 Subject: [PATCH 5/5] remove comment and move variable declaration to block start --- lib_dec/fd_cng_dec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib_dec/fd_cng_dec.c b/lib_dec/fd_cng_dec.c index 1a6d4abb50..adca10685d 100644 --- a/lib_dec/fd_cng_dec.c +++ b/lib_dec/fd_cng_dec.c @@ -2235,7 +2235,9 @@ void FdCngDecodeMDCTStereoSID( int16_t is_out_ms; float *invTrfMatrix; float tmpRAM[FDCNG_VQ_MAX_LEN][FDCNG_VQ_DCT_MAXTRUNC]; - +#ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG + int32_t tmp; +#endif invTrfMatrix = (float *) tmpRAM; create_IDCT_N_Matrix( invTrfMatrix, FDCNG_VQ_MAX_LEN, FDCNG_VQ_DCT_MAXTRUNC, sizeof( tmpRAM ) / ( sizeof( float ) ) ); @@ -2286,9 +2288,6 @@ void FdCngDecodeMDCTStereoSID( } #ifdef NONBE_MDCT_ST_DTX_FIX_SUBOPT_SPATIAL_CNG - /* TODO: temporary hack, need to decide what to do with core-coder bitrate */ - int32_t tmp; - tmp = sts[1]->total_brate; sts[1]->total_brate = sts[1]->total_brate + 16 * FRAMES_PER_SEC; /* read the four additional coherence values */ -- GitLab