From baacf96b3499ae6f427fef3ea2b28fdc85de9b58 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 22 Oct 2024 17:00:07 +0530 Subject: [PATCH] Fix for 3GPP issue 942: Decoder crash for MASA1 rateswitching to EXT with FER in generate_masking_noise_mdct_fx() --- lib_com/cnst.h | 1 + lib_com/prot_fx.h | 6 ++ lib_dec/acelp_core_dec_ivas_fx.c | 8 +- lib_dec/dec_tcx.c | 2 +- lib_dec/fd_cng_dec_fx.c | 123 +++++++++++++++++++++++++++++++ lib_dec/ivas_tcx_core_dec.c | 9 ++- 6 files changed, 145 insertions(+), 4 deletions(-) diff --git a/lib_com/cnst.h b/lib_com/cnst.h index 5619e905d..6bcb3ea4b 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -1481,6 +1481,7 @@ enum #define LOW_RATE_HQ_CORE 1 /* Signal use of Low Rate MDCT core */ #define LOW_RATE_HQ_CORE_TRAN 2 /* Signal use of Low Rate MDCT core Tran SWB */ #define NORM_MDCT_FACTOR L_FRAME8k /* Normalize Low Rate MDCT coefficients to this frame size */ +#define SQRT_NORM_MDCT_FACTOR_Q27 (1697734891) /* Normalize Low Rate MDCT coefficients to this frame size */ #define BANDS_MAX ( 4 * 8 ) #define MAX_GQLEVS 32 /* Max fine gain levels */ #define BITS_DE_CMODE 1 diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 6befe335c..9c2e272e4 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -7486,6 +7486,12 @@ void generate_masking_noise_mdct_fx( Word32 *mdctBuffer, /* i/o: time-domain s , Word16 L_frame ); +#ifdef IVAS_FLOAT_FIXED +void generate_masking_noise_mdct_ivas_fx( Word32 *mdctBuffer, /* i/o: time-domain signal */ + Word16 *mdctBuffer_e, /* i/o: exponent time-domain signal */ + HANDLE_FD_CNG_COM st /* i/o: FD_CNG structure containing all buffers and variables */ ); +#endif + // init_dec_fx.c ivas_error init_decoder_fx( Decoder_State *st_fx, /* o: Decoder static variables structure */ diff --git a/lib_dec/acelp_core_dec_ivas_fx.c b/lib_dec/acelp_core_dec_ivas_fx.c index a16878a55..679120dbb 100644 --- a/lib_dec/acelp_core_dec_ivas_fx.c +++ b/lib_dec/acelp_core_dec_ivas_fx.c @@ -1613,7 +1613,13 @@ ivas_error acelp_core_dec_ivas_fx( { set16_fx( st->hFdCngDec->hFdCngCom->olapBufferSynth2, 0, st->hFdCngDec->hFdCngCom->fftlen ); } - generate_masking_noise_fx( psyn_fx, st->Q_syn, st->hFdCngDec->hFdCngCom, st->hFdCngDec->hFdCngCom->frameSize, 0 /*, 0, 0, st->element_mode, hStereoCng, nchan_out*/ ); + Word32 psyn_32_fx[L_FRAME16k]; + Word16 exp; + Copy_Scale_sig_16_32_no_sat( psyn_fx, psyn_32_fx, st->hFdCngDec->hFdCngCom->frameSize, sub( Q6, st->Q_syn ) ); // Q6 + Copy_Scale_sig_16_32_no_sat( st->hFdCngDec->hFdCngCom->olapBufferSynth2, st->hFdCngDec->hFdCngCom->olapBufferSynth2_fx, shl( st->hFdCngDec->hFdCngCom->frameSize, 1 ), Q15 ); // Q15 + generate_masking_noise_ivas_fx( psyn_32_fx, &exp, st->hFdCngDec->hFdCngCom, st->hFdCngDec->hFdCngCom->frameSize, 0, 0, 0, st->element_mode, hStereoCng, nchan_out ); + Copy_Scale_sig_32_16( psyn_32_fx, psyn_fx, st->hFdCngDec->hFdCngCom->frameSize, sub( st->Q_syn, exp ) ); // Q = st->Q_syn + Copy_Scale_sig_32_16( st->hFdCngDec->hFdCngCom->olapBufferSynth2_fx, st->hFdCngDec->hFdCngCom->olapBufferSynth2, shl( st->hFdCngDec->hFdCngCom->frameSize, 1 ), -Q15 ); // Q0 } } } diff --git a/lib_dec/dec_tcx.c b/lib_dec/dec_tcx.c index b952ba542..79efe8356 100644 --- a/lib_dec/dec_tcx.c +++ b/lib_dec/dec_tcx.c @@ -2088,7 +2088,7 @@ void decoder_tcx_imdct_fx( /* Generate additional comfort noise to mask potential coding artefacts */ IF( NE_16( st->flag_cna, 0 ) && NE_16( st->element_mode, IVAS_CPE_TD ) && NE_16( st->element_mode, IVAS_CPE_DFT ) && EQ_16( st->cna_dirac_flag, 0 ) ) { - generate_masking_noise_mdct_fx( x_fx, &x_e, st->hFdCngDec->hFdCngCom, L_frame ); + generate_masking_noise_mdct_ivas_fx( x_fx, &x_e, st->hFdCngDec->hFdCngCom ); FOR( Word16 ind = 0; ind < L_frame; ind++ ) { x_fx[ind] = L_shr( x_fx[ind], sub( 31, add( x_e, q_x ) ) ); diff --git a/lib_dec/fd_cng_dec_fx.c b/lib_dec/fd_cng_dec_fx.c index 2bfaa4b02..b6ba82895 100644 --- a/lib_dec/fd_cng_dec_fx.c +++ b/lib_dec/fd_cng_dec_fx.c @@ -5050,6 +5050,129 @@ void generate_masking_noise_mdct_fx( } } } + +void generate_masking_noise_mdct_ivas_fx( + Word32 *mdctBuffer, /* i/o: time-domain signal */ + Word16 *mdctBuffer_e, /* i/o: exponent time-domain signal */ + HANDLE_FD_CNG_COM hFdCngCom /* i/o: FD_CNG structure containing all buffers and variables */ ) +{ + Word16 i, sq, cnt; + Word16 cngNoiseLevelExp; + Word32 scale, temp; + Word32 sqrtNoiseLevel; + Word32 maskingNoise[2 * L_FRAME16k]; + Word32 *pMaskingNoise; + Word32 *cngNoiseLevel; + Word16 *seed; + + // PMTE(); /*IVAS CODE need to be added */ + /* pointer initializations */ + cngNoiseLevel = hFdCngCom->cngNoiseLevel; + seed = &( hFdCngCom->seed ); + + /* Compute additional CN level */ + cngNoiseLevelExp = hFdCngCom->cngNoiseLevelExp; + move16(); + + scale = ONE_IN_Q30; + move32(); + + cnt = sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ); + + /* skip noise generating if level is very low, to avoid problems with possibly running into denormals */ + IF( hFdCngCom->likelihood_noisy_speech > 0 ) + { + FOR( i = 0; i < SIZE_SCALE_TABLE_CN; i++ ) + { + test(); + test(); + IF( EQ_16( hFdCngCom->CngBandwidth, scaleTable_cn_only[i].bwmode ) && + GE_32( hFdCngCom->CngBitrate, scaleTable_cn_only[i].bitrateFrom ) && + LE_32( hFdCngCom->CngBitrate, scaleTable_cn_only[i].bitrateTo ) ) + { + BREAK; + } + } + + /* Exclude clean speech */ + scale = L_mult( scaleTable_cn_only[i].scale, hFdCngCom->likelihood_noisy_speech ); // Q30 (14 + 15 + 1) + + /* + Generate Gaussian random noise in real and imaginary parts of the FFT bins + Amplitudes are adjusted to the estimated noise level cngNoiseLevel in each bin + */ + IF( hFdCngCom->startBand == 0 ) + { + /* *cngNoiseLevel * scale * 0.5 */ + temp = Mpy_32_32( *cngNoiseLevel, scale ); // exp = cngNoiseLevelExp (cngNoiseLevelExp + Q30(scale) + 1(0.5f) - 31) + sq = cngNoiseLevelExp; + move16(); + + sqrtNoiseLevel = Sqrt32( temp, &sq ); + + rand_gauss_fx( &temp, seed, Q15 ); // Q15 + + maskingNoise[0] = L_shl( Mpy_32_32( temp, sqrtNoiseLevel ), sq ); // Q15 + move32(); + + pMaskingNoise = &maskingNoise[1]; + cngNoiseLevel++; + cnt = sub( cnt, 1 ); + } + ELSE + { + set32_fx( maskingNoise, 0, hFdCngCom->startBand ); + pMaskingNoise = maskingNoise + hFdCngCom->startBand; + } + + FOR( i = 0; i < cnt; i++ ) + { + /* MDCT bins */ + /* *cngNoiseLevel * scale * 0.5 */ + temp = Mpy_32_32( *cngNoiseLevel, scale ); // exp = cngNoiseLevelExp (cngNoiseLevelExp + Q30(scale) + 1(0.5f) - 31) + sq = cngNoiseLevelExp; + move16(); + + sqrtNoiseLevel = Sqrt32( temp, &sq ); + + rand_gauss_fx( &temp, seed, Q15 ); // Q15 + + *pMaskingNoise = L_shl( Mpy_32_32( temp, sqrtNoiseLevel ), sq ); // Q15 + move32(); + + pMaskingNoise++; + cngNoiseLevel++; + } + + /*re-normalization of energy level: M/sqrt(2)*/ + v_multc_fixed( maskingNoise, SQRT_NORM_MDCT_FACTOR_Q27, maskingNoise, hFdCngCom->stopFFTbin ); // Q11 + + scale_sig32( maskingNoise, hFdCngCom->stopFFTbin, sub( 20, *mdctBuffer_e ) ); // exp = *mdctBuffer_e + + /* Add some comfort noise on top of decoded signal */ + v_add_fixed( maskingNoise, mdctBuffer, mdctBuffer, hFdCngCom->stopFFTbin, 1 ); + *mdctBuffer_e = sub( *mdctBuffer_e, 1 ); + move16(); + } + ELSE + { + /* very low level case - just update random seeds */ + IF( hFdCngCom->startBand == 0 ) + { + rand_gauss_fx( &maskingNoise[0], seed, Q15 ); // Q15 + cngNoiseLevel++; + cnt = sub( cnt, 1 ); + } + + FOR( i = 0; i < cnt; i++ ) + { + rand_gauss_fx( &maskingNoise[0], seed, Q15 ); // Q15 + move32(); + } + } + + return; +} #endif #ifdef IVAS_CODE_CNG diff --git a/lib_dec/ivas_tcx_core_dec.c b/lib_dec/ivas_tcx_core_dec.c index 05734c278..4ac7c32a8 100644 --- a/lib_dec/ivas_tcx_core_dec.c +++ b/lib_dec/ivas_tcx_core_dec.c @@ -996,8 +996,13 @@ void stereo_tcx_core_dec_fx( } ELSE IF( NE_16( st->element_mode, IVAS_CPE_DFT ) ) { - // generate_masking_noise( signal_out, st->hFdCngDec->hFdCngCom, st->hFdCngDec->hFdCngCom->frameSize, 0, 0, 0, st->element_mode, hStereoCng, nchan_out ); - generate_masking_noise_fx( signal_out_fx, 0, st->hFdCngDec->hFdCngCom, st->hFdCngDec->hFdCngCom->frameSize, 0 ); + Word32 signal_out_32_fx[L_FRAME48k]; + Word16 exp; + Copy_Scale_sig_16_32_no_sat( signal_out_fx, signal_out_32_fx, st->hFdCngDec->hFdCngCom->frameSize, Q6 ); + Copy_Scale_sig_16_32_no_sat( st->hFdCngDec->hFdCngCom->olapBufferSynth2, st->hFdCngDec->hFdCngCom->olapBufferSynth2_fx, shl( st->hFdCngDec->hFdCngCom->frameSize, 1 ), Q15 ); + generate_masking_noise_ivas_fx( signal_out_32_fx, &exp, st->hFdCngDec->hFdCngCom, st->hFdCngDec->hFdCngCom->frameSize, 0, 0, 0, st->element_mode, hStereoCng, nchan_out ); + Copy_Scale_sig_32_16( signal_out_32_fx, signal_out_fx, st->hFdCngDec->hFdCngCom->frameSize, negate( exp ) ); // Q0 + Copy_Scale_sig_32_16( st->hFdCngDec->hFdCngCom->olapBufferSynth2_fx, st->hFdCngDec->hFdCngCom->olapBufferSynth2, shl( st->hFdCngDec->hFdCngCom->frameSize, 1 ), -Q15 ); // Q0 } } -- GitLab