From 95c647d848a904aa0ffd19d8aa0f014a974c8a34 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 21 Nov 2024 14:38:57 +0530 Subject: [PATCH] Fixed point Changes corresponding to float reference code updates made in MR 797,788 [x] Bug fix for inverse matrix computation [x] fixes from !1826 and !1862 (flt.pt.) to the ivas-float-update branch --- lib_com/options.h | 2 ++ lib_dec/ivas_stereo_mdct_core_dec_fx.c | 45 ++++++++++++++++++++++---- lib_enc/enc_uv_fx.c | 5 +-- lib_enc/ivas_cpe_enc.c | 11 ++++--- lib_enc/ivas_decision_matrix_enc.c | 27 +++++++++++++++- lib_enc/ivas_stereo_classifier.c | 16 ++++++--- lib_enc/ivas_stereo_td_enc.c | 33 +++++++++++++++++-- 7 files changed, 118 insertions(+), 21 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 095c183e8..21e924688 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -196,6 +196,8 @@ #define FIX_953_WRONG_ENERGY_RATIO_MASA_EXT /* Nok: Fix 953 wrong energy ratio value after shift and cast to Word8 */ #define FIX_982_WRONG_DECODED_ENERGY_RATIO /* Nokia: Fix 982 wrong energy in EXT mode and in second direction when present */ #define FIX_999_WRONG_ISM_EXTENDED_METADATA /* VA: fix 999: fix ISM extended metadata decoding */ +#define NONBE_FIX_1205_TD_STEREO_MOD_CT /* VA: fix mismatch of coder_type (mod_ct) btw. TD stereo encoder and decoder */ +#define NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING /* FhG: fixes for decoder-side noise level estimation in MDCT-Stereo to prevent noise bursts in stereo switching */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_dec/ivas_stereo_mdct_core_dec_fx.c b/lib_dec/ivas_stereo_mdct_core_dec_fx.c index 155e207b8..60e9c62db 100644 --- a/lib_dec/ivas_stereo_mdct_core_dec_fx.c +++ b/lib_dec/ivas_stereo_mdct_core_dec_fx.c @@ -988,7 +988,7 @@ static void run_min_stats_fx( { Word16 ch, will_estimate_noise_on_channel[CPE_CHANNELS], save_VAD[CPE_CHANNELS]; Word32 power_spec[L_FRAME16k]; - Word16 power_spec_16[L_FRAME16k], power_spec_e = 0; + Word16 power_spec_e = 0; move16(); Word32 *spec_in; Word16 spec_e; @@ -1044,29 +1044,62 @@ static void run_min_stats_fx( IF( ( EQ_16( will_estimate_noise_on_channel[0], will_estimate_noise_on_channel[1] ) ) || EQ_16( ch, 0 ) ) { Word16 tmp16 = getScaleFactor32( spec_in, L_FRAME16k ); + +#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING + Word32 power_spec_scale_fac; + + /* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */ + power_spec_scale_fac = 20792; // 1.f / ( L_FRAME16k * L_FRAME16k ) in Q31 + move32(); + power_spec[0] = Mpy_32_32( W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */ + move32(); + power_spec[L_FRAME16k - 1] = Mpy_32_32( W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */ + move32(); +#else /* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */ - power_spec[0] = W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */ - power_spec[L_FRAME16k - 1] = W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */ + power_spec[0] = W_extract_h( W_shl( W_mult_32_32( spec_in[0], spec_in[0] ), sub( tmp16, 4 ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */ + power_spec[L_FRAME16k - 1] = W_extract_h( W_shl( W_mult_32_32( spec_in[L_FRAME16k - 1], spec_in[L_FRAME16k - 1] ), sub( tmp16, 4 ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31 */ +#endif FOR( Word16 i = 1; i < L_FRAME16k - 1; i++ ) { Word32 mdst; - mdst = L_sub( spec_in[i + 1], spec_in[i - 1] ); /* Q31 - x_e */ + mdst = L_sub( spec_in[i + 1], spec_in[i - 1] ); /* Q31 - x_e */ + +#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING + power_spec[i] = Mpy_32_32( L_add( W_extract_h( W_shl( W_mult_32_32( spec_in[i], spec_in[i] ), sub( tmp16, 4 ) ) ), W_extract_h( W_shl( W_mult_32_32( mdst, mdst ), sub( tmp16, 4 ) ) ) ), power_spec_scale_fac ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31*/ +#else power_spec[i] = L_add( W_extract_h( W_shl( W_mult_32_32( spec_in[i], spec_in[i] ), sub( tmp16, 4 ) ) ), W_extract_h( W_shl( W_mult_32_32( mdst, mdst ), sub( tmp16, 4 ) ) ) ); /* 2 * (Q31 - x_e) + tmp16 - Q4 - Q31*/ +#endif move32(); } power_spec_e = sub( add( 4, shl( spec_e, 1 ) ), tmp16 ); } - Copy_Scale_sig32_16( power_spec, power_spec_16, L_FRAME16k, 0 ); /* exp(power_spec_e) */ - +#ifndef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING noisy_speech_detection_fx( st->hFdCngDec, st->VAD && st->m_frame_type == ACTIVE_FRAME, power_spec_16, sub( 15, power_spec_e ) ); st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = add( mult_r( 32440 /* 0.99 in Q15 */, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech ), mult_r( st->hFdCngDec->hFdCngCom->flag_noisy_speech, 328 /* 0.01 in Q15 */ ) ); /* Q15 */ move16(); st->lp_noise = st->hFdCngDec->lp_noise; /* Q9.23 */ move32(); +#endif } +#ifdef NONBE_FIX_1204_MDCT_STEREO_NOISE_EST_SCALING + IF( EQ_16( st->core, TCX_20_CORE ) ) + { + Word16 x_fx_16[L_FRAME16k]; + Copy_Scale_sig32_16( x[ch][0], x_fx_16, L_FRAME16k, 0 ); /* exp(x_e) */ + + test(); + noisy_speech_detection_fx( st->hFdCngDec, save_VAD[ch] && EQ_16( st->m_frame_type, ACTIVE_FRAME ), x_fx_16, sub( Q15, x_e[ch][0] ) ); + st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = add( mult_r( 32440 /* 0.99 in Q15 */, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech ), mult_r( st->hFdCngDec->hFdCngCom->flag_noisy_speech, 328 /* 0.01 in Q15 */ ) ); /* Q15 */ + move16(); + st->lp_noise = st->hFdCngDec->lp_noise; /* Q9.23 */ + move32(); + } +#endif + test(); test(); IF( will_estimate_noise_on_channel[0] || will_estimate_noise_on_channel[1] || st->bfi ) diff --git a/lib_enc/enc_uv_fx.c b/lib_enc/enc_uv_fx.c index 7708c0e1c..afa32cd3a 100644 --- a/lib_enc/enc_uv_fx.c +++ b/lib_enc/enc_uv_fx.c @@ -282,6 +282,7 @@ void encod_unvoiced_ivas_fx( { Word16 xn_fx[L_SUBFR]; /* Target vector for pitch search */ Word16 h1_fx[L_SUBFR]; /* Impulse response vector */ + Word16 h2_fx[L_SUBFR]; /* Impulse response vector */ Word16 code_fx[L_SUBFR]; /* Fixed codebook excitation */ Word16 y2_fx[L_SUBFR]; /* Filtered algebraic excitation */ Word16 *pt_pitch_fx; /* pointer to floating pitch buffer */ @@ -348,7 +349,7 @@ void encod_unvoiced_ivas_fx( find_targets_fx( speech_fx, hLPDmem->mem_syn, i_subfr, &hLPDmem->mem_w0, p_Aq_fx, res_fx, L_SUBFR, p_Aw_fx, st_fx->preemph_fac, xn_fx, cn_fx, h1_fx ); - /*Copy_Scale_sig(h1_fx, h2_fx, L_SUBFR, -2);*/ + Copy_Scale_sig( h1_fx, h2_fx, L_SUBFR, -2 ); Scale_sig( h1_fx, L_SUBFR, add( 1, shift ) ); /* set h1[] in Q14 with scaling for convolution */ /* scaling of xn[] to limit dynamic at 12 bits */ @@ -390,7 +391,7 @@ void encod_unvoiced_ivas_fx( // E_ACELP_innovative_codebook_fx( exc_fx, *pt_pitch_fx, 0, 1, gain_pit_fx, hLPDmem->tilt_code, acelp_cfg, i_subfr, p_Aq_fx, h1_fx, xn_fx, cn_fx, y1, y2_fx, (Word8) st_fx->acelp_autocorr, &prm, code_fx, shift, st_fx->L_frame, st_fx->last_L_frame, st_fx->total_brate, st_fx->element_mode ); inov_encode_ivas_fx( st_fx, st_fx->core_brate, 0, L_FRAME, st_fx->last_L_frame, UNVOICED, st_fx->bwidth, st_fx->sharpFlag, i_subfr, -1, p_Aq_fx, - gain_pit_fx, cn_fx, exc_fx, h1_fx, hLPDmem->tilt_code, *pt_pitch_fx, xn_fx, code_fx, y2_fx, &unbits_PI, L_SUBFR, shift, Q_new ); + gain_pit_fx, cn_fx, exc_fx, h2_fx, hLPDmem->tilt_code, *pt_pitch_fx, xn_fx, code_fx, y2_fx, &unbits_PI, L_SUBFR, shift, Q_new ); E_ACELP_xy2_corr( xn_fx, y1, y2_fx, &g_corr, L_SUBFR, Q_xn ); diff --git a/lib_enc/ivas_cpe_enc.c b/lib_enc/ivas_cpe_enc.c index 9f9b7ad8d..e4205115c 100644 --- a/lib_enc/ivas_cpe_enc.c +++ b/lib_enc/ivas_cpe_enc.c @@ -678,8 +678,11 @@ ivas_error ivas_cpe_enc_fx( #ifdef IVAS_FLOAT_FIXED #ifdef IVAS_FLOAT_FIXED_CONVERSIONS Word16 input_fx[2][L_FRAME48k]; - floatToFixed_arr16( sts[1]->input, input_fx[0], 0, input_frame ); - floatToFixed_arr16( sts[0]->input, input_fx[1], 0, input_frame ); + Word16 tmpppp; + tmpppp = s_min( Q_factor_arr( sts[1]->input, input_frame ), Q_factor_arr( sts[0]->input, input_frame ) ); + + floatToFixed_arr16( sts[1]->input, input_fx[1], tmpppp, input_frame ); + floatToFixed_arr16( sts[0]->input, input_fx[0], tmpppp, input_frame ); #endif Word16 tdm_SM_flag; IF( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) @@ -695,8 +698,8 @@ ivas_error ivas_cpe_enc_fx( stereo_tdm_downmix_ivas_fx( hCPE->hStereoTD, input_fx[0], input_fx[1], input_frame, tdm_ratio_idx, tdm_SM_flag, tdm_ratio_idx_SM ); #ifdef IVAS_FLOAT_FIXED_CONVERSIONS - fixedToFloat_arr( input_fx[0], sts[0]->input, 0, input_frame ); - fixedToFloat_arr( input_fx[1], sts[1]->input, 0, input_frame ); + fixedToFloat_arr( input_fx[0], sts[0]->input, tmpppp, input_frame ); + fixedToFloat_arr( input_fx[1], sts[1]->input, tmpppp, input_frame ); #endif #else stereo_tdm_downmix( hCPE->hStereoTD, sts[0]->input, sts[1]->input, input_frame, tdm_ratio_idx, ( ( hCPE->hStereoTD->tdm_LRTD_flag == 0 ) ? tdm_SM_or_LRTD_Pri : 0 ), tdm_ratio_idx_SM ); diff --git a/lib_enc/ivas_decision_matrix_enc.c b/lib_enc/ivas_decision_matrix_enc.c index b3ae38f10..3f9ab4f9b 100644 --- a/lib_enc/ivas_decision_matrix_enc.c +++ b/lib_enc/ivas_decision_matrix_enc.c @@ -498,10 +498,24 @@ void ivas_decision_matrix_enc_fx( IF( EQ_16( st->core, TCX_20_CORE ) && LT_32( st->total_brate, STEREO_TCX_MIN_RATE ) ) { st->core = ACELP_CORE; + +#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT + test(); + test(); + test(); + /* In TD stereo below 24.4 kbps we cannot overwrite the `coder_type` when it is set to TRANSITION, */ + /* as it is used for TD stereo bit allocation. To ensure consistent bit allocation, it must remain unchanged on the decoder side. */ + if ( st->idchan == 0 && !( LT_32( element_brate, IVAS_24k4 ) && EQ_16( st->coder_type, TRANSITION ) && EQ_16( st->element_mode, IVAS_CPE_TD ) ) ) + { + st->coder_type = AUDIO; + move16(); + } +#else st->coder_type = AUDIO; + move16(); +#endif st->sp_aud_decision2 = 0; - move16(); move16(); move16(); @@ -844,6 +858,16 @@ void ivas_signaling_enc_fx( IF( EQ_16( st->core, ACELP_CORE ) ) { +#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT + /* write coder type */ + push_indice( hBstr, IND_ACELP_SIGNALLING, st->coder_type, 3 ); + + IF( GE_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) ) + { + /* write sharpening flag */ + push_indice( hBstr, IND_SHARP_FLAG, st->sharpFlag, 1 ); + } +#else IF( LT_32( element_brate, FRMT_SHP_MIN_BRATE_IVAS ) ) { push_indice( hBstr, IND_ACELP_SIGNALLING, st->coder_type, 3 ); @@ -856,6 +880,7 @@ void ivas_signaling_enc_fx( /* write sharpening flag */ push_indice( hBstr, IND_SHARP_FLAG, st->sharpFlag, 1 ); } +#endif /* write extension layer flag to distinguish between TBE (0) and BWE (1) */ IF( st->extl_brate > 0 ) diff --git a/lib_enc/ivas_stereo_classifier.c b/lib_enc/ivas_stereo_classifier.c index b85e182f3..d78fd7433 100644 --- a/lib_enc/ivas_stereo_classifier.c +++ b/lib_enc/ivas_stereo_classifier.c @@ -1463,15 +1463,21 @@ void unclr_classifier_dft_fx( /* normalize score to -1:+1 */ - /*if (score > UNCLR_SCORE_THR) + IF( BASOP_Util_Add_Mant32Exp( score, score_e, -UNCLR_SCORE_THR_Q28, 3, &i ) > 0 ) { - score = UNCLR_SCORE_THR; + score = UNCLR_SCORE_THR_Q28; + move32(); + score_e = 3; + move16(); } - else if (score < -UNCLR_SCORE_THR) + ELSE IF( BASOP_Util_Add_Mant32Exp( score, score_e, UNCLR_SCORE_THR_Q28, 3, &i ) < 0 ) { - score = -UNCLR_SCORE_THR; + score = -UNCLR_SCORE_THR_Q28; + move32(); + score_e = 3; + move16(); } - score /= 2 * UNCLR_SCORE_THR;*/ + score = L_shr_r_sat( score, sub( 3, score_e ) ); // Q31 /* weight raw score with relative energy */ diff --git a/lib_enc/ivas_stereo_td_enc.c b/lib_enc/ivas_stereo_td_enc.c index 5c0b6a6f7..79181844d 100644 --- a/lib_enc/ivas_stereo_td_enc.c +++ b/lib_enc/ivas_stereo_td_enc.c @@ -832,6 +832,32 @@ void tdm_configure_enc_fx( mod_ct = AUDIO; move16(); +#ifdef NONBE_FIX_1205_TD_STEREO_MOD_CT + IF( LT_32( hCPE->element_brate, IVAS_24k4 ) ) + { + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + /* In TD stereo, the TRANSITION mode has a specific bit allocation. All other formats share the same bit allocation. For these other formats, `mod_ct` is set to AUDIO to aid in debugging, though it does not have any functional impact. */ + if ( !( sts[0]->localVAD == 0 && EQ_16( sts[0]->coder_type, TRANSITION ) ) && + ( EQ_16( sts[0]->coder_type, TRANSITION ) || + ( ( ( GE_16( sts[0]->last_L_frame, L_FRAME16k ) && sts[0]->flag_ACELP16k == 0 ) || + ( EQ_16( sts[0]->last_L_frame, L_FRAME ) && EQ_16( sts[0]->flag_ACELP16k, 1 ) ) ) && + ( sts[0]->last_core_brate != FRAME_NO_DATA ) && + NE_32( sts[0]->last_core_brate, SID_2k40 ) && + NE_16( sts[0]->coder_type_raw, VOICED ) ) ) ) + { + mod_ct = TRANSITION; + move16(); + } + } +#else IF( LT_32( hCPE->element_brate, IVAS_24k4 ) ) { mod_ct = sts[0]->coder_type; @@ -857,6 +883,7 @@ void tdm_configure_enc_fx( move16(); } } +#endif /* Correction of tdm_inst_ratio_idx in case of TC in the seecondary channel */ test(); @@ -1467,9 +1494,9 @@ static void tdm_downmix_fade_ivas_fx( FOR( i = start_index; i < end_index; i++ ) { - FR_Y_fx[i] = add( mult( add( mult( Right_in_fx[i], extract_l( One_m_OldRatio_fx ) ), mult( Left_in_fx[i], extract_l( OldRatio_L_fx ) ) ), fade_out_fx ), mult( add( mult( Right_in_fx[i], extract_l( One_m_Ratio_fx ) ), mult( Left_in_fx[i], extract_l( ratio_L_fx ) ) ), fade_in_fx ) ); // Qx - LR_X_fx[i] = add( mult( sub( mult( Left_in_fx[i], extract_l( One_m_OldRatio_fx ) ), mult( Right_in_fx[i], extract_l( OldRatio_L_fx ) ) ), fade_out_fx ), - mult( sub( mult( Left_in_fx[i], extract_l( One_m_Ratio_fx ) ), mult( Right_in_fx[i], extract_l( ratio_L_fx ) ) ), + FR_Y_fx[i] = add( mult( add( mult( Right_in_fx[i], extract_h( One_m_OldRatio_fx ) ), mult( Left_in_fx[i], extract_h( OldRatio_L_fx ) ) ), fade_out_fx ), mult( add( mult( Right_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Left_in_fx[i], extract_h( ratio_L_fx ) ) ), fade_in_fx ) ); // Qx + LR_X_fx[i] = add( mult( sub( mult( Left_in_fx[i], extract_h( One_m_OldRatio_fx ) ), mult( Right_in_fx[i], extract_h( OldRatio_L_fx ) ) ), fade_out_fx ), + mult( sub( mult( Left_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Right_in_fx[i], extract_h( ratio_L_fx ) ) ), fade_in_fx ) ); // Qx move16(); move16(); -- GitLab