fix wrong noise shapes by avoiding double whitening (d5ac7778) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_com/ivas_cnst.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -807,6 +807,11 @@ typedef enum {
		TCX10_IN_0_TCX20_IN_1,
		TCX20_IN_0_TCX10_IN_1,
		} TONALMDCTCONC_NOISE_GEN_MODE;

		typedef enum {
		ON_FIRST_LOST_FRAME,
		ON_FIRST_GOOD_FRAME,
		} TONALMDCTCONC_NOISE_SHAPE_WHITENING_MODE;
		#endif

lib_com/ivas_prot.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -5454,6 +5454,12 @@ void TonalMdctConceal_create_concealment_noise(
		const int16_t crossfade_gain,
		const TONALMDCTCONC_NOISE_GEN_MODE noise_gen_mode
		);

		void TonalMdctConceal_whiten_noise_shape(
		Decoder_State *st,
		const int16_t L_frame,
		const TONALMDCTCONC_NOISE_SHAPE_WHITENING_MODE
		);
		#endif

		float rand_triangular_signed(

lib_dec/fd_cng_dec.c

+19 −38

Original line number	Diff line number	Diff line
		@@ -496,48 +496,28 @@ void ApplyFdCng(
		}
		}

		#ifndef MDCT_STEREO_PLC_FADE_2_BG_NOISE
		if ( ( concealWholeFrame == 1 ) && ( st->nbLostCmpt == 1 ) && sum_f( cngNoiseLevel + hFdCngCom->startBand, hFdCngCom->stopFFTbin - hFdCngCom->startBand ) > 0.01f )
		{
		#ifndef MDCT_STEREO_PLC_FADE_2_BG_NOISE
		/* update lsf cng estimate for concealment. Do that during concealment, in order to avoid addition clean channel complexity*/
		lpc_from_spectrum( hFdCngCom, hFdCngCom->startBand, hFdCngCom->stopFFTbin, 0 );
		#else
		if ( st->element_mode == IVAS_CPE_MDCT && st->core != ACELP_CORE )
		if ( ( concealWholeFrame == 1 ) && ( st->nbLostCmpt == 1 ) )
		{
		float scf[SNS_NPTS];
		float scf_int[FDNS_NPTS];
		float whitenend_noise_shape[L_FRAME16k];
		int16_t inc, start_idx, stop_idx;
		float *noiseLevelPtr;

		wmops_sub_start( "get scfs for bg" );

		inc = ( st->core > TCX_20 ) ? 2 : 1;
		start_idx = hFdCngCom->startBand / inc;
		stop_idx = L_frame / inc;
		noiseLevelPtr = cngNoiseLevel;
		/* update lsf cng estimate for concealment. Do that during concealment, in order to avoid addition clean channel complexity*/

		set_zero( whitenend_noise_shape, start_idx );
		for ( j = start_idx; j < stop_idx; j++, noiseLevelPtr += inc )
		/* always set psychParameters ... */
		if ( st->hTonalMDCTConc != NULL )
		{
		whitenend_noise_shape[j] = *noiseLevelPtr;
		st->hTonalMDCTConc->psychParams = ( st->core == TCX_20_CORE ) ? &st->hTonalMDCTConc->psychParamsTCX20 : &st->hTonalMDCTConc->psychParamsTCX10;
		}
		if ( st->core == TCX_20_CORE )

		/* ... but do actual computations only if sufficient energy in noise shape */
		if ( sum_f( cngNoiseLevel + hFdCngCom->startBand, hFdCngCom->stopFFTbin - hFdCngCom->startBand ) > 0.01f )
		{
		st->hTonalMDCTConc->psychParams = &st->hTonalMDCTConc->psychParamsTCX20;
		}
		else
		if ( st->element_mode == IVAS_CPE_MDCT && st->core != ACELP_CORE )
		{
		st->hTonalMDCTConc->psychParams = &st->hTonalMDCTConc->psychParamsTCX10;
		}

		sns_compute_scf( whitenend_noise_shape, st->hTonalMDCTConc->psychParams, L_frame, scf );
		sns_interpolate_scalefactors( scf_int, scf, ENC );
		sns_interpolate_scalefactors( st->hTonalMDCTConc->scaleFactorsBackground, scf, DEC );
		sns_shape_spectrum( whitenend_noise_shape, st->hTonalMDCTConc->psychParams, scf_int, L_frame );

		mvr2r( whitenend_noise_shape + start_idx, cngNoiseLevel, stop_idx - start_idx );
		wmops_sub_end();
		TonalMdctConceal_whiten_noise_shape( st, L_frame, ON_FIRST_LOST_FRAME );
		}
		else if ( st->element_mode != IVAS_CPE_MDCT )
		{
		@@ -546,6 +526,7 @@ void ApplyFdCng(
		mvr2r( lsp_cng, st->lspold_cng, M );
		lsp2lsf( lsp_cng, st->lsf_cng, M, sr_core );
		}
		}
		#endif

		#ifndef MDCT_STEREO_PLC_FADE_2_BG_NOISE

lib_dec/ivas_core_dec.c

+15 −0

Original line number	Diff line number	Diff line
		@@ -428,6 +428,21 @@ ivas_error ivas_core_dec(
		{
		updateBuffersForDmxMdctStereo( hCPE, output_frame, output, synth );
		}

		#ifdef MDCT_STEREO_PLC_FADE_2_BG_NOISE
		if ( sts[0]->bfi == 0 && sts[0]->prev_bfi == 1 )
		{
		/* On first good frame after frameloss undo the whitening of the bg noise shape */
		for ( n = 0; n < n_channels; ++n )
		{
		if ( sts[n]->last_core != ACELP_CORE )
		{
		TonalMdctConceal_whiten_noise_shape( sts[n], L_FRAME16k, ON_FIRST_GOOD_FRAME );
		}
		}
		}
		#endif

		}

		/---------------------------------------------------------------------

lib_dec/tonalMDCTconcealment.c

+50 −0

Original line number	Diff line number	Diff line
		@@ -1119,4 +1119,54 @@ void TonalMdctConceal_create_concealment_noise(

		return;
		}

		void TonalMdctConceal_whiten_noise_shape(
		Decoder_State *st,
		const int16_t L_frame,
		const TONALMDCTCONC_NOISE_SHAPE_WHITENING_MODE whitening_mode
		)
		{
		float scf[SNS_NPTS];
		float scfs_int[FDNS_NPTS];
		float whitenend_noise_shape[L_FRAME16k];
		int16_t inc, start_idx, stop_idx;
		float noiseLevelPtr, scfs_bg, *scfs_for_shaping;
		PsychoacousticParameters **psychParams;
		HANDLE_FD_CNG_COM hFdCngCom;

		wmops_sub_start( "apply_sns_on_noise_shape" );

		scfs_bg = &st->hTonalMDCTConc->scaleFactorsBackground[0];
		psychParams = &st->hTonalMDCTConc->psychParams;
		hFdCngCom = st->hFdCngDec->hFdCngCom;

		inc = ( ( whitening_mode == ON_FIRST_LOST_FRAME ? st->core : st->last_core ) > TCX_20_CORE ) ? 2 : 1;
		start_idx = hFdCngCom->startBand / inc;
		stop_idx = L_frame / inc;
		noiseLevelPtr = hFdCngCom->cngNoiseLevel;

		set_zero( whitenend_noise_shape, start_idx );
		for ( int16_t j = start_idx; j < stop_idx; j++, noiseLevelPtr += inc )
		{
		whitenend_noise_shape[j] = *noiseLevelPtr;
		}

		if ( whitening_mode == ON_FIRST_LOST_FRAME )
		{
		sns_compute_scf( whitenend_noise_shape, *psychParams, L_frame, scf );
		sns_interpolate_scalefactors( scfs_int, scf, ENC );
		sns_interpolate_scalefactors( scfs_bg, scf, DEC );
		scfs_for_shaping = &scfs_int[0];
		}
		else if ( whitening_mode == ON_FIRST_GOOD_FRAME )
		{
		scfs_for_shaping = &scfs_bg[0];
		}

		sns_shape_spectrum( whitenend_noise_shape, *psychParams, scfs_for_shaping, L_frame );

		mvr2r( whitenend_noise_shape + start_idx, hFdCngCom->cngNoiseLevel, stop_idx - start_idx );
		wmops_sub_end();
		}

		#endif