Merge branch 'fxd_changes_related_to_float_updates_in_MR_797_788' into 'main' (e0a989ab) · Commits · SA4 / Audio / IVAS BASOP

lib_com/options.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -196,6 +196,8 @@
		#define FIX_953_WRONG_ENERGY_RATIO_MASA_EXT /* Nok: Fix 953 wrong energy ratio value after shift and cast to Word8 */
		#define FIX_982_WRONG_DECODED_ENERGY_RATIO /* Nokia: Fix 982 wrong energy in EXT mode and in second direction when present */
		#define FIX_999_WRONG_ISM_EXTENDED_METADATA /* VA: fix 999: fix ISM extended metadata decoding */
		#define NONBE_FIX_1205_TD_STEREO_MOD_CT /* VA: fix mismatch of coder_type (mod_ct) btw. TD stereo encoder and decoder */
		#define NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING /* FhG: fixes for decoder-side noise level estimation in MDCT-Stereo to prevent noise bursts in stereo switching */
		/* ################## End DEVELOPMENT switches ######################### */

		/* clang-format on */

lib_dec/ivas_stereo_mdct_core_dec_fx.c

+39 −6

Original line number	Diff line number	Diff line
		@@ -988,7 +988,7 @@ static void run_min_stats_fx(
		{
		Word16 ch, will_estimate_noise_on_channel[CPE_CHANNELS], save_VAD[CPE_CHANNELS];
		Word32 power_spec[L_FRAME16k];
		Word16 power_spec_16[L_FRAME16k], power_spec_e = 0;
		Word16 power_spec_e = 0;
		move16();
		Word32 *spec_in;
		Word16 spec_e;
		@@ -1044,29 +1044,62 @@ static void run_min_stats_fx(
		IF( ( EQ_16( will_estimate_noise_on_channel[0], will_estimate_noise_on_channel[1] ) ) \|\| EQ_16( ch, 0 ) )
		{
		Word16 tmp16 = getScaleFactor32( spec_in, L_FRAME16k );

		#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
		Word32 power_spec_scale_fac;

		/* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */
		power_spec_scale_fac = 20792; // 1.f / ( L_FRAME16k * L_FRAME16k ) in Q31
		move32();
		power_spec[0] = Mpy_32_32( W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
		move32();
		power_spec[L_FRAME16k - 1] = Mpy_32_32( W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
		move32();
		#else
		/* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */
		power_spec[0] = W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
		power_spec[L_FRAME16k - 1] = W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */
		#endif
		FOR( Word16 i = 1; i < L_FRAME16k - 1; i++ )
		{
		Word32 mdst;
		mdst = L_sub( spec_in[i + 1], spec_in[i - 1] ); /* Q31 - x_e */

		#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
		power_spec[i] = Mpy_32_32( L_add( W_extract_h( W_shl( W_mult_32_32( spec_in[i], spec_in[i] ), sub( tmp16, 4 ) ) ), W_extract_h( W_shl( W_mult_32_32( mdst, mdst ), sub( tmp16, 4 ) ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31*/
		#else
		power_spec[i] = L_add( W_extract_h( W_shl( W_mult_32_32( spec_in[i], spec_in[i] ), sub( tmp16, 4 ) ) ), W_extract_h( W_shl( W_mult_32_32( mdst, mdst ), sub( tmp16, 4 ) ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31*/
		#endif
		move32();
		}
		power_spec_e = sub( add( 4, shl( spec_e, 1 ) ), tmp16 );
		}

		Copy_Scale_sig32_16( power_spec, power_spec_16, L_FRAME16k, 0 ); /* exp(power_spec_e) */

		#ifndef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
		noisy_speech_detection_fx( st->hFdCngDec, st->VAD && st->m_frame_type == ACTIVE_FRAME, power_spec_16, sub( 15, power_spec_e ) );

		st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = add( mult_r( 32440 /* 0.99 in Q15 /, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech ), mult_r( st->hFdCngDec->hFdCngCom->flag_noisy_speech, 328 / 0.01 in Q15 / ) ); / Q15 */
		move16();
		st->lp_noise = st->hFdCngDec->lp_noise; /* Q9.23 */
		move32();
		#endif
		}

		#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING
		IF( EQ_16( st->core, TCX_20_CORE ) )
		{
		Word16 x_fx_16[L_FRAME16k];
		Copy_Scale_sig32_16( x[ch][0], x_fx_16, L_FRAME16k, 0 ); /* exp(x_e) */

		test();
		noisy_speech_detection_fx( st->hFdCngDec, save_VAD[ch] && EQ_16( st->m_frame_type, ACTIVE_FRAME ), x_fx_16, sub( Q15, x_e[ch][0] ) );
		st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = add( mult_r( 32440 /* 0.99 in Q15 /, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech ), mult_r( st->hFdCngDec->hFdCngCom->flag_noisy_speech, 328 / 0.01 in Q15 / ) ); / Q15 */
		move16();
		st->lp_noise = st->hFdCngDec->lp_noise; /* Q9.23 */
		move32();
		}
		#endif

		test();
		test();
		IF( will_estimate_noise_on_channel[0] \|\| will_estimate_noise_on_channel[1] \|\| st->bfi )

lib_enc/enc_uv_fx.c

+3 −2

Original line number	Diff line number	Diff line
		@@ -282,6 +282,7 @@ void encod_unvoiced_ivas_fx(
		{
		Word16 xn_fx[L_SUBFR]; /* Target vector for pitch search */
		Word16 h1_fx[L_SUBFR]; /* Impulse response vector */
		Word16 h2_fx[L_SUBFR]; /* Impulse response vector */
		Word16 code_fx[L_SUBFR]; /* Fixed codebook excitation */
		Word16 y2_fx[L_SUBFR]; /* Filtered algebraic excitation */
		Word16 pt_pitch_fx; / pointer to floating pitch buffer */
		@@ -348,7 +349,7 @@ void encod_unvoiced_ivas_fx(
		find_targets_fx( speech_fx, hLPDmem->mem_syn, i_subfr, &hLPDmem->mem_w0, p_Aq_fx,
		res_fx, L_SUBFR, p_Aw_fx, st_fx->preemph_fac, xn_fx, cn_fx, h1_fx );

		/Copy_Scale_sig(h1_fx, h2_fx, L_SUBFR, -2);/
		Copy_Scale_sig( h1_fx, h2_fx, L_SUBFR, -2 );
		Scale_sig( h1_fx, L_SUBFR, add( 1, shift ) ); /* set h1[] in Q14 with scaling for convolution */

		/* scaling of xn[] to limit dynamic at 12 bits */
		@@ -390,7 +391,7 @@ void encod_unvoiced_ivas_fx(
		// E_ACELP_innovative_codebook_fx( exc_fx, *pt_pitch_fx, 0, 1, gain_pit_fx, hLPDmem->tilt_code, acelp_cfg, i_subfr, p_Aq_fx, h1_fx, xn_fx, cn_fx, y1, y2_fx, (Word8) st_fx->acelp_autocorr, &prm, code_fx, shift, st_fx->L_frame, st_fx->last_L_frame, st_fx->total_brate, st_fx->element_mode );
		inov_encode_ivas_fx( st_fx, st_fx->core_brate, 0, L_FRAME, st_fx->last_L_frame,
		UNVOICED, st_fx->bwidth, st_fx->sharpFlag, i_subfr, -1, p_Aq_fx,
		gain_pit_fx, cn_fx, exc_fx, h1_fx, hLPDmem->tilt_code, *pt_pitch_fx, xn_fx, code_fx, y2_fx, &unbits_PI, L_SUBFR, shift, Q_new );
		gain_pit_fx, cn_fx, exc_fx, h2_fx, hLPDmem->tilt_code, *pt_pitch_fx, xn_fx, code_fx, y2_fx, &unbits_PI, L_SUBFR, shift, Q_new );

		E_ACELP_xy2_corr( xn_fx, y1, y2_fx, &g_corr, L_SUBFR, Q_xn );

lib_enc/ivas_cpe_enc.c

+7 −4

Original line number	Diff line number	Diff line
		@@ -678,8 +678,11 @@ ivas_error ivas_cpe_enc_fx(
		#ifdef IVAS_FLOAT_FIXED
		#ifdef IVAS_FLOAT_FIXED_CONVERSIONS
		Word16 input_fx[2][L_FRAME48k];
		floatToFixed_arr16( sts[1]->input, input_fx[0], 0, input_frame );
		floatToFixed_arr16( sts[0]->input, input_fx[1], 0, input_frame );
		Word16 tmpppp;
		tmpppp = s_min( Q_factor_arr( sts[1]->input, input_frame ), Q_factor_arr( sts[0]->input, input_frame ) );

		floatToFixed_arr16( sts[1]->input, input_fx[1], tmpppp, input_frame );
		floatToFixed_arr16( sts[0]->input, input_fx[0], tmpppp, input_frame );
		#endif
		Word16 tdm_SM_flag;
		IF( hCPE->hStereoTD->tdm_LRTD_flag == 0 )
		@@ -695,8 +698,8 @@ ivas_error ivas_cpe_enc_fx(
		stereo_tdm_downmix_ivas_fx( hCPE->hStereoTD, input_fx[0], input_fx[1], input_frame, tdm_ratio_idx, tdm_SM_flag, tdm_ratio_idx_SM );

		#ifdef IVAS_FLOAT_FIXED_CONVERSIONS
		fixedToFloat_arr( input_fx[0], sts[0]->input, 0, input_frame );
		fixedToFloat_arr( input_fx[1], sts[1]->input, 0, input_frame );
		fixedToFloat_arr( input_fx[0], sts[0]->input, tmpppp, input_frame );
		fixedToFloat_arr( input_fx[1], sts[1]->input, tmpppp, input_frame );
		#endif
		#else
		stereo_tdm_downmix( hCPE->hStereoTD, sts[0]->input, sts[1]->input, input_frame, tdm_ratio_idx, ( ( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) ? tdm_SM_or_LRTD_Pri : 0 ), tdm_ratio_idx_SM );

lib_enc/ivas_decision_matrix_enc.c

+26 −1

Original line number	Diff line number	Diff line
		@@ -498,10 +498,24 @@ void ivas_decision_matrix_enc_fx(
		IF( EQ_16( st->core, TCX_20_CORE ) && LT_32( st->total_brate, STEREO_TCX_MIN_RATE ) )
		{
		st->core = ACELP_CORE;

		#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT
		test();
		test();
		test();
		/* In TD stereo below 24.4 kbps we cannot overwrite the `coder_type` when it is set to TRANSITION, */
		/* as it is used for TD stereo bit allocation. To ensure consistent bit allocation, it must remain unchanged on the decoder side. */
		if ( st->idchan == 0 && !( LT_32( element_brate, IVAS_24k4 ) && EQ_16( st->coder_type, TRANSITION ) && EQ_16( st->element_mode, IVAS_CPE_TD ) ) )
		{
		st->coder_type = AUDIO;
		move16();
		}
		#else
		st->coder_type = AUDIO;
		move16();
		#endif
		st->sp_aud_decision2 = 0;

		move16();
		move16();
		move16();

		@@ -844,6 +858,16 @@ void ivas_signaling_enc_fx(

		IF( EQ_16( st->core, ACELP_CORE ) )
		{
		#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT
		/* write coder type */
		push_indice( hBstr, IND_ACELP_SIGNALLING, st->coder_type, 3 );

		IF( GE_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) )
		{
		/* write sharpening flag */
		push_indice( hBstr, IND_SHARP_FLAG, st->sharpFlag, 1 );
		}
		#else
		IF( LT_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) )
		{
		push_indice( hBstr, IND_ACELP_SIGNALLING, st->coder_type, 3 );
		@@ -856,6 +880,7 @@ void ivas_signaling_enc_fx(
		/* write sharpening flag */
		push_indice( hBstr, IND_SHARP_FLAG, st->sharpFlag, 1 );
		}
		#endif

		/* write extension layer flag to distinguish between TBE (0) and BWE (1) */
		IF( st->extl_brate > 0 )