From 7993a4ff2e7e64edd2df41ed86012e555ff9ebf8 Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Thu, 20 Mar 2025 08:00:15 -0400 Subject: [PATCH 1/3] possible small complexity reduction and precision improvement --- lib_com/options.h | 3 +++ lib_enc/fd_cng_enc_fx.c | 14 +++++++++++++- lib_enc/ivas_ism_param_enc_fx.c | 8 ++++++++ lib_enc/ivas_omasa_enc_fx.c | 4 ++++ lib_enc/ivas_stereo_td_enc_fx.c | 16 ++++++++++++++++ lib_enc/ivas_td_low_rate_enc_fx.c | 4 ++++ lib_enc/subband_fft_fx.c | 7 +++++++ lib_enc/swb_tbe_enc_fx.c | 8 ++++++++ .../ivas_dirac_dec_binaural_functions_fx.c | 18 ++++++++++++++++-- 9 files changed, 79 insertions(+), 3 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index c9207fb3c..841eb6a2b 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -175,4 +175,7 @@ #define NONBE_FIX_1402_WAVEADJUST /* VA: BASOP iisue 1402: fix waveform adjustment decoder PLC */ #define FIX_ISSUE_1376 /* VA: Fix for issue 1376 (issue with GSC excitation) */ #define OPT_SBA_AVOID_SPAR_RESCALE /* Optimization made to spar decoder and IGF */ + +#define IMPROV_PRECISION /* Place where precision can be cost free improved */ +#define IMPROV_PRECISION_EVSBE /* Place where precision can be cost free improved, might affect EVS BE */ #endif diff --git a/lib_enc/fd_cng_enc_fx.c b/lib_enc/fd_cng_enc_fx.c index 9c3a236d7..c690b6a76 100644 --- a/lib_enc/fd_cng_enc_fx.c +++ b/lib_enc/fd_cng_enc_fx.c @@ -2732,16 +2732,28 @@ void stereoFdCngCoherence_fx( move16(); move16(); +#ifdef IMPROV_PRECISION_EVSBE + cr = BASOP_Util_Add_Mant32Exp( cr, cr_exp, L_mac( L_mult( pt_fftL[0], pt_fftR[0] ), pt_fftL[L_FFT / 2], pt_fftR[L_FFT / 2] ), shl( fft_exp, 1 ), &cr_exp ); /* exp(cr_exp) */ + eL = BASOP_Util_Add_Mant32Exp( eL, eL_exp, L_mac( L_mult( pt_fftL[0], pt_fftL[0] ), pt_fftL[L_FFT / 2], pt_fftL[L_FFT / 2] ), shl( fft_exp, 1 ), &eL_exp ); /* exp(eL_exp) */ + eR = BASOP_Util_Add_Mant32Exp( eR, eR_exp, L_mac( L_mult( pt_fftR[0], pt_fftR[0] ), pt_fftR[L_FFT / 2], pt_fftR[L_FFT / 2] ), shl( fft_exp, 1 ), &eR_exp ); /* exp(eR_exp) */ +#else cr = BASOP_Util_Add_Mant32Exp( cr, cr_exp, L_add( L_mult( pt_fftL[0], pt_fftR[0] ), L_mult( pt_fftL[L_FFT / 2], pt_fftR[L_FFT / 2] ) ), shl( fft_exp, 1 ), &cr_exp ); /* exp(cr_exp) */ eL = BASOP_Util_Add_Mant32Exp( eL, eL_exp, L_add( L_mult( pt_fftL[0], pt_fftL[0] ), L_mult( pt_fftL[L_FFT / 2], pt_fftL[L_FFT / 2] ) ), shl( fft_exp, 1 ), &eL_exp ); /* exp(eL_exp) */ eR = BASOP_Util_Add_Mant32Exp( eR, eR_exp, L_add( L_mult( pt_fftR[0], pt_fftR[0] ), L_mult( pt_fftR[L_FFT / 2], pt_fftR[L_FFT / 2] ) ), shl( fft_exp, 1 ), &eR_exp ); /* exp(eR_exp) */ - +#endif FOR( i = 1; i < L_FFT / 2; i++ ) { +#ifdef IMPROV_PRECISION_EVSBE + cr = BASOP_Util_Add_Mant32Exp( cr, cr_exp, L_mac( L_mult( pt_fftL[i], pt_fftR[i] ), pt_fftL[L_FFT - i], pt_fftR[L_FFT - i] ), shl( fft_exp, 1 ), &cr_exp ); /* exp(cr_exp) */ + ci = BASOP_Util_Add_Mant32Exp( ci, ci_exp, L_mac( L_mult( -pt_fftL[i], pt_fftR[L_FFT - i] ), pt_fftL[L_FFT - i], pt_fftR[i] ), shl( fft_exp, 1 ), &ci_exp ); /* exp(ci_exp) */ + eL = BASOP_Util_Add_Mant32Exp( eL, eL_exp, L_mac( L_mult( pt_fftL[i], pt_fftL[i] ), pt_fftL[L_FFT - i], pt_fftL[L_FFT - i] ), shl( fft_exp, 1 ), &eL_exp ); /* exp(eL_exp) */ + eR = BASOP_Util_Add_Mant32Exp( eR, eR_exp, L_mac( L_mult( pt_fftR[i], pt_fftR[i] ), pt_fftR[L_FFT - i], pt_fftR[L_FFT - i] ), shl( fft_exp, 1 ), &eR_exp ); /* exp(eR_exp) */ +#else cr = BASOP_Util_Add_Mant32Exp( cr, cr_exp, L_add( L_mult( pt_fftL[i], pt_fftR[i] ), L_mult( pt_fftL[L_FFT - i], pt_fftR[L_FFT - i] ) ), shl( fft_exp, 1 ), &cr_exp ); /* exp(cr_exp) */ ci = BASOP_Util_Add_Mant32Exp( ci, ci_exp, L_add( L_mult( -pt_fftL[i], pt_fftR[L_FFT - i] ), L_mult( pt_fftL[L_FFT - i], pt_fftR[i] ) ), shl( fft_exp, 1 ), &ci_exp ); /* exp(ci_exp) */ eL = BASOP_Util_Add_Mant32Exp( eL, eL_exp, L_add( L_mult( pt_fftL[i], pt_fftL[i] ), L_mult( pt_fftL[L_FFT - i], pt_fftL[L_FFT - i] ) ), shl( fft_exp, 1 ), &eL_exp ); /* exp(eL_exp) */ eR = BASOP_Util_Add_Mant32Exp( eR, eR_exp, L_add( L_mult( pt_fftR[i], pt_fftR[i] ), L_mult( pt_fftR[L_FFT - i], pt_fftR[L_FFT - i] ) ), shl( fft_exp, 1 ), &eR_exp ); /* exp(eR_exp) */ +#endif } test(); test(); diff --git a/lib_enc/ivas_ism_param_enc_fx.c b/lib_enc/ivas_ism_param_enc_fx.c index cda8cffd6..9d9b27786 100644 --- a/lib_enc/ivas_ism_param_enc_fx.c +++ b/lib_enc/ivas_ism_param_enc_fx.c @@ -295,7 +295,11 @@ void ivas_param_ism_stereo_dmx_fx( { tmp = L_add( tmp, 2 * EVS_PI_FX ); } +#ifdef IMPROV_PRECISION + cardioid_left[i] = mac_r( L_mult( alpha, 16384 ), sub( ONE_IN_Q15 - 1, alpha ), getCosWord16( extract_l( tmp ) ) ); // Q14 +#else cardioid_left[i] = add( shr( alpha, 1 ), mult( sub( ONE_IN_Q15 - 1, alpha ), getCosWord16( extract_l( tmp ) ) ) ); // Q14 +#endif move16(); IF( st_ivas->hSCE[0]->hCoreCoder[0]->ini_frame > 0 ) @@ -303,7 +307,11 @@ void ivas_param_ism_stereo_dmx_fx( Word16 last_cardioid_right; last_cardioid_right = sub( ONE_IN_Q14 /* 1.0f in Q14 */, last_cardioid_left ); /* Smoothing */ +#ifdef IMPROV_PRECISION + cardioid_left[i] = mac_r( L_mult( 24576 /* 0.75f in Q15 */, cardioid_left[i] ), 8192 /* 0.25f in Q15 */, last_cardioid_left ) ; // Q14 +#else cardioid_left[i] = add( mult( 24576 /* 0.75f in Q15 */, cardioid_left[i] ), mult( 8192 /* 0.25f in Q15 */, last_cardioid_left ) ); // Q14 +#endif move16(); Word32 grad_32 = L_mult( sub( cardioid_left[i], last_cardioid_left ), shl( one_by_input_frame, 1 ) /* 2.0f / (float) input_frame*/ ); /* Q14+Q16 = Q30 */ /* for the right cardioid, multiply with -1 */ /* Cardioids sum up to 1 */ diff --git a/lib_enc/ivas_omasa_enc_fx.c b/lib_enc/ivas_omasa_enc_fx.c index d31e6d53d..855a505d9 100644 --- a/lib_enc/ivas_omasa_enc_fx.c +++ b/lib_enc/ivas_omasa_enc_fx.c @@ -1476,7 +1476,11 @@ static void ivas_omasa_dmx_fx( g1 = interpolator[k]; move16(); g2 = sub( MAX_WORD16, g1 ); /*q15*/ +#ifdef IMPROV_PRECISION_EVSBE + data_out[j][k] = L_add( data_out[j][k], Mpy_32_32( L_mac( L_mult( g1, gains[j] ), g2, prev_gains[i][j] ) /*q31*/, data_in[i][k] ) ); /*Qx*/ +#else data_out[j][k] = L_add( data_out[j][k], Mpy_32_32( L_add( L_mult( g1, gains[j] ), L_mult( g2, prev_gains[i][j] ) ) /*q31*/, data_in[i][k] ) ); /*Qx*/ +#endif move32(); } } diff --git a/lib_enc/ivas_stereo_td_enc_fx.c b/lib_enc/ivas_stereo_td_enc_fx.c index 378068827..1618843f0 100644 --- a/lib_enc/ivas_stereo_td_enc_fx.c +++ b/lib_enc/ivas_stereo_td_enc_fx.c @@ -400,9 +400,15 @@ void tdm_configure_enc_fx( hStereoTD->tdm_use_IAWB_Ave_lpc = 0; /* Flag initialisation */ move16(); +#ifdef IMPROV_PRECISION + sts[0]->hSpMusClas->tdm_lt_Etot_fx = mac_r( L_mult( 3277 /*0.1f in Q15*/, Etot_last_fx[0] ), 29491 /* 0.9f*/, sts[0]->hSpMusClas->tdm_lt_Etot_fx ); + move16(); + sts[1]->hSpMusClas->tdm_lt_Etot_fx = mac_r( L_mult( 3277 /*0.1f in Q15*/, Etot_last_fx[1] ), 29491 /* 0.9f*/, sts[1]->hSpMusClas->tdm_lt_Etot_fx ); +#else sts[0]->hSpMusClas->tdm_lt_Etot_fx = add( mult( 3277 /*0.1f in Q15*/, Etot_last_fx[0] ), mult( 29491 /* 0.9f*/, sts[0]->hSpMusClas->tdm_lt_Etot_fx ) ); move16(); sts[1]->hSpMusClas->tdm_lt_Etot_fx = add( mult( 3277 /*0.1f in Q15*/, Etot_last_fx[1] ), mult( 29491 /* 0.9f*/, sts[1]->hSpMusClas->tdm_lt_Etot_fx ) ); +#endif move16(); test(); @@ -799,11 +805,21 @@ static void tdm_downmix_plain_ivas_fx( ) { Word16 i; +#ifdef IMPROV_PRECISION + Word16 One_m_Ratio_fx16, ratio_L_fx16; + One_m_Ratio_fx16 = extract_h( One_m_Ratio_fx ); + ratio_L_fx16 = extract_h( ratio_L_fx ); +#endif FOR( i = start_index; i < end_index; i++ ) { +#ifdef IMPROV_PRECISION + FR_Y_fx[i] = mac_r( L_mult( Right_in_fx[i], One_m_Ratio_fx16 ), Left_in_fx[i], ratio_L_fx16 ); + LR_X_fx[i] = msu_r( L_mult( Left_in_fx[i], One_m_Ratio_fx16 ), Right_in_fx[i], ratio_L_fx16 ); +#else FR_Y_fx[i] = add( mult( Right_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Left_in_fx[i], extract_h( ratio_L_fx ) ) ); LR_X_fx[i] = sub( mult( Left_in_fx[i], extract_h( One_m_Ratio_fx ) ), mult( Right_in_fx[i], extract_h( ratio_L_fx ) ) ); +#endif move16(); move16(); } diff --git a/lib_enc/ivas_td_low_rate_enc_fx.c b/lib_enc/ivas_td_low_rate_enc_fx.c index c959ef758..29dd767bb 100644 --- a/lib_enc/ivas_td_low_rate_enc_fx.c +++ b/lib_enc/ivas_td_low_rate_enc_fx.c @@ -314,7 +314,11 @@ void encod_gen_2sbfr( lp_filt_exc_enc_ivas_fx( MODE1, coder_type, i_subfr, exc, h1, xn, y1, xn2, 2 * L_SUBFR, L_frame, g_corr, clip_gain, &gain_pit, &st->acelp_cfg.ltf_mode ); #endif /* update long-term pitch gain for speech/music classifier */ +#ifdef IMPROV_PRECISION + st->hSpMusClas->lowrate_pitchGain = mac_r( L_mult( 29491, st->hSpMusClas->lowrate_pitchGain ), 3277 /*Q15*/, gain_pit ); // Q14 +#else st->hSpMusClas->lowrate_pitchGain = add( mult( 29491, st->hSpMusClas->lowrate_pitchGain ), mult( 3277 /*Q15*/, gain_pit ) ); // Q14 +#endif move16(); /*-----------------------------------------------------------------* diff --git a/lib_enc/subband_fft_fx.c b/lib_enc/subband_fft_fx.c index 10de09fe9..8a6ee47ab 100644 --- a/lib_enc/subband_fft_fx.c +++ b/lib_enc/subband_fft_fx.c @@ -35,10 +35,17 @@ static void ComplexMult_16( const Word16 c1, const Word16 c2 ) { +#ifdef IMPROV_PRECISION_EVSBE + *y1 = mac_r( L_mult( x1, c1 ), x2, c2 ); + move16(); + *y2 = msu_r( L_mult( x2, c1 ), x1, c2 ); + move16(); +#else *y1 = add( mult( x1, c1 ), mult( x2, c2 ) ); move16(); *y2 = sub( mult( x2, c1 ), mult( x1, c2 ) ); move16(); +#endif } /*-------------------------------------------------------------------* * ffr_getSfWord32() diff --git a/lib_enc/swb_tbe_enc_fx.c b/lib_enc/swb_tbe_enc_fx.c index ba8ee5aa8..6cde2c5ea 100644 --- a/lib_enc/swb_tbe_enc_fx.c +++ b/lib_enc/swb_tbe_enc_fx.c @@ -4002,7 +4002,11 @@ void swb_tbe_enc_ivas_fx( FOR( i = 0; i < NUM_SHB_SUBGAINS; i++ ) { // GainShape[i] = ( 1 - feedback ) * GainShape[i] + feedback * GainShape_Interp[i]; +#ifdef IMPROV_PRECISION_EVSBE + GainShape_fx[i] = extract_h( L_mac( L_mult( sub( MAX16B, feedback ), GainShape_fx[i] ), feedback, GainShape_Interp_fx[i] ) ); +#else GainShape_fx[i] = extract_h( L_add( L_mult( sub( MAX16B, feedback ), GainShape_fx[i] ), L_mult( feedback, GainShape_Interp_fx[i] ) ) ); +#endif move16(); } } @@ -4118,7 +4122,11 @@ void swb_tbe_enc_ivas_fx( FOR( i = 0; i < NUM_SHB_SUBGAINS; i++ ) { // GainShape[i] = ( 1 - feedback ) * GainShape[i * NUM_SHB_SUBGAINS] + feedback * GainShape_Interp[i]; +#ifdef IMPROV_PRECISION_EVSBE + GainShape_fx[i] = extract_h( L_mac( L_mult( sub( MAX16B, feedback ), GainShape_fx[i * NUM_SHB_SUBGAINS] ), feedback, GainShape_Interp_fx[i] ) ); // Q15 +#else GainShape_fx[i] = extract_h( L_add( L_mult( sub( MAX16B, feedback ), GainShape_fx[i * NUM_SHB_SUBGAINS] ), L_mult( feedback, GainShape_Interp_fx[i] ) ) ); // Q15 +#endif move16(); } diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index b5dd1f8b9..70c52a210 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -2551,15 +2551,21 @@ static void ivas_dirac_dec_binaural_process_output_fx( { Word16 gain; /* Mixing using the formulated processing matrix M */ +#ifdef IMPROV_PRECISION + gain = mac_r( L_mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxRePrev_fx[chA][chB][bin] ), interpVal_fx, hDiracDecBin->processMtxRe_fx[chA][chB][bin] ); // Q11 +#else gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxRePrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxRe_fx[chA][chB][bin] ) ); // Q11 - +#endif outSlotRe_fx[bin] = Madd_32_16( outSlotRe_fx[bin], inRe_fx[chB][slot][bin], gain ); // q_inp_mix-4//q_result outSlotIm_fx[bin] = Madd_32_16( outSlotIm_fx[bin], inIm_fx[chB][slot][bin], gain ); // q_inp_mix-4//q_result move32(); move32(); +#ifdef IMPROV_PRECISION_EVSBE + gain = mac_r( L_mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxImPrev_fx[chA][chB][bin] ), interpVal_fx, hDiracDecBin->processMtxIm_fx[chA][chB][bin] ); // Q11 +#else gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxImPrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxIm_fx[chA][chB][bin] ) ); // Q11 - +#endif // interpVal * hDiracDecBin->processMtxIm[chA][chB][bin]; outSlotRe_fx[bin] = Msub_32_16( outSlotRe_fx[bin], inIm_fx[chB][slot][bin], gain ); // q_inp_mix-4//q_result outSlotIm_fx[bin] = Madd_32_16( outSlotIm_fx[bin], inRe_fx[chB][slot][bin], gain ); // q_inp_mix-4//q_result @@ -2571,7 +2577,11 @@ static void ivas_dirac_dec_binaural_process_output_fx( test(); IF( LT_16( bin, max_band_decorr ) && LT_16( chB, 2 ) ) { +#ifdef IMPROV_PRECISION_EVSBE + gain = mac_r( L_mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxDecRePrev_fx[chA][chB][bin] ), interpVal_fx, hDiracDecBin->processMtxDecRe_fx[chA][chB][bin] ); // Q11 +#else gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxDecRePrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxDecRe_fx[chA][chB][bin] ) ); // Q11 +#endif // interpVal * hDiracDecBin->processMtxDecRe[chA][chB][bin]; outSlotRe_fx[bin] = Madd_32_16( outSlotRe_fx[bin], decSlotRePointer_fx[bin], gain ); // q_inp_mix-4//q_result outSlotIm_fx[bin] = Madd_32_16( outSlotIm_fx[bin], decSlotImPointer_fx[bin], gain ); // q_inp_mix-4//q_result @@ -2579,7 +2589,11 @@ static void ivas_dirac_dec_binaural_process_output_fx( move32(); +#ifdef IMPROV_PRECISION_EVSBE + gain = mac_r( L_mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxDecImPrev_fx[chA][chB][bin] ), interpVal_fx, hDiracDecBin->processMtxDecIm_fx[chA][chB][bin] ); // Q11 +#else gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxDecImPrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxDecIm_fx[chA][chB][bin] ) ); // Q11 +#endif outSlotRe_fx[bin] = Msub_32_16( outSlotRe_fx[bin], decSlotImPointer_fx[bin], gain ); // q_inp_mix-4//q_result outSlotIm_fx[bin] = Madd_32_16( outSlotIm_fx[bin], decSlotRePointer_fx[bin], gain ); // q_inp_mix-4//q_result move32(); -- GitLab From 284ba9485c53e223825e14de5c5b88984c619bcc Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Thu, 20 Mar 2025 08:30:51 -0400 Subject: [PATCH 2/3] more possible slight complexity reduction --- lib_com/ivas_pca_tools_fx.c | 9 ++++++++- lib_dec/bass_psfilter_fx.c | 4 ++++ lib_dec/ivas_sns_dec_fx.c | 8 ++++++++ lib_dec/ivas_stereo_cng_dec.c | 28 +++++++++++++++++++++++++++- lib_dec/ivas_stereo_dft_dec_fx.c | 5 ++++- lib_dec/ivas_stereo_ica_dec_fx.c | 4 ++++ 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/lib_com/ivas_pca_tools_fx.c b/lib_com/ivas_pca_tools_fx.c index c84078a7d..ea4cfce7e 100644 --- a/lib_com/ivas_pca_tools_fx.c +++ b/lib_com/ivas_pca_tools_fx.c @@ -1377,7 +1377,11 @@ void pca_enc_s3_fx( q_ang_2surv_fx( ph1_fx, n1, ph1_q_fx, ind1 ); +#ifdef IMPROV_PRECISION + tmp = mac_r( L_mac( L_mult( q_fx[1], q_fx[1] ), q_fx[2], q_fx[2] ), q_fx[3], q_fx[3] ); // Q15 + Q15 - Q15 -> Q15 +#else tmp = add( add( mult( q_fx[1], q_fx[1] ), mult( q_fx[2], q_fx[2] ) ), mult( q_fx[3], q_fx[3] ) ); // Q15 + Q15 - Q15 -> Q15 +#endif r_e = 0; move16(); r_fx = Sqrt16( tmp, &r_e ); @@ -1431,8 +1435,11 @@ void pca_enc_s3_fx( q_ang_2surv_fx( ph2_fx, n2[i], ph2_q_fx + 2 * i, ind2 + 2 * i ); } +#ifdef IMPROV_PRECISION + r_fx = Sqrt16( mac_r( L_mult( q_fx[2], q_fx[2] ), q_fx[3], q_fx[3] ), &r_e ); +#else r_fx = Sqrt16( add( mult( q_fx[2], q_fx[2] ), mult( q_fx[3], q_fx[3] ) ), &r_e ); - +#endif v_fx = BASOP_Util_Divide1616_Scale( q_fx[2], r_fx, &v_e ); v_e = add( v_e, sub( 0, r_e ) ); diff --git a/lib_dec/bass_psfilter_fx.c b/lib_dec/bass_psfilter_fx.c index 45cf7feed..14f8008ff 100644 --- a/lib_dec/bass_psfilter_fx.c +++ b/lib_dec/bass_psfilter_fx.c @@ -1041,7 +1041,11 @@ Word16 res_bpf_adapt_ivas_fx( bpf_error_ratio = ONE_IN_Q14; // Q13 move16(); } +#ifdef IMPROV_PRECISION + bpf_error_ratio = mac_r( L_mult( STEREO_DFT_BPF_ADAPT_BETA_FX, bpf_error_ratio ), ( MAX_16 - STEREO_DFT_BPF_ADAPT_BETA_FX ), hStereoDft->bpf_error_ratio_mem_fx ); +#else bpf_error_ratio = add( mult( STEREO_DFT_BPF_ADAPT_BETA_FX, bpf_error_ratio ), mult( ( MAX_16 - STEREO_DFT_BPF_ADAPT_BETA_FX ), hStereoDft->bpf_error_ratio_mem_fx ) ); +#endif hStereoDft->bpf_error_ratio_mem_fx = bpf_error_ratio; move16(); diff --git a/lib_dec/ivas_sns_dec_fx.c b/lib_dec/ivas_sns_dec_fx.c index 3bc6d337e..327116024 100644 --- a/lib_dec/ivas_sns_dec_fx.c +++ b/lib_dec/ivas_sns_dec_fx.c @@ -83,7 +83,11 @@ static void sns_1st_dec_fx( FOR( i = 0; i < M / 2; i++ ) { +#ifdef IMPROV_PRECISION + snsq_fx[i] = L_mac( L_mult( ( *p_dico++ ), cdbk_fix ), means[i], means_fix ); // Q16 +#else snsq_fx[i] = L_add( L_mult( ( *p_dico++ ), cdbk_fix ), L_mult( means[i], means_fix ) ); // Q16 +#endif move32(); } @@ -91,7 +95,11 @@ static void sns_1st_dec_fx( FOR( i = M / 2; i < M; i++ ) { +#ifdef IMPROV_PRECISION + snsq_fx[i] = L_mac( L_mult( ( *p_dico++ ), cdbk_fix ), means[i], means_fix ); /*Q16*/ +#else snsq_fx[i] = L_add( L_mult( ( *p_dico++ ), cdbk_fix ), L_mult( means[i], means_fix ) ); /*Q16*/ +#endif move32(); } diff --git a/lib_dec/ivas_stereo_cng_dec.c b/lib_dec/ivas_stereo_cng_dec.c index 223860db0..1994f4875 100644 --- a/lib_dec/ivas_stereo_cng_dec.c +++ b/lib_dec/ivas_stereo_cng_dec.c @@ -121,7 +121,9 @@ static void stereo_dft_generate_comfort_noise_fx( Word32 tmp32_1, tmp32_2; Word16 q_div, q_sqrt1, q_sqrt2, q_sqrt, sqrt_res; Word16 q_shift, q_shift_1, q_shift_2, min_q; - +#ifdef IMPROV_PRECISION + Word16 tmp16, tmp_p, tmp_s; +#endif hFdCngCom = st->hFdCngDec->hFdCngCom; push_wmops( "DFT_CNG" ); @@ -163,7 +165,11 @@ static void stereo_dft_generate_comfort_noise_fx( } ELSE { +#ifdef IMPROV_PRECISION + hStereoDft->g_state_fx[b] = mac_r( L_mult( ONE_MINUS_A_GFILT_FX, extract_h( *pSideGain++ ) ), A_GFILT_FX, hStereoDft->g_state_fx[b] ); /* Q15 */ +#else hStereoDft->g_state_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, extract_h( *pSideGain++ ) ), mult( A_GFILT_FX, hStereoDft->g_state_fx[b] ) ); /* Q15 */ +#endif move16(); } @@ -190,13 +196,21 @@ static void stereo_dft_generate_comfort_noise_fx( } ELSE { +#ifdef IMPROV_PRECISION + hStereoCng->cm_fx[b] = mac_r( L_mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), A_GFILT_FX, hStereoCng->cm_fx[b] ); /* Q15 */ +#else hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); /* Q15 */ +#endif move16(); } } ELSE { +#ifdef IMPROV_PRECISION + hStereoCng->cm_fx[b] = mac_r( L_mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), A_GFILT_FX, hStereoCng->cm_fx[b] ); /* Q15 */ +#else hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); /* Q15 */ +#endif move16(); } } @@ -228,6 +242,17 @@ static void stereo_dft_generate_comfort_noise_fx( } LR_ratio = extract_h( tdm_ratio_tabl_fx[hStereoCng->last_tdm_idx] ); /* Q15 */ +#ifdef IMPROV_PRECISION + tmp16 = shr( hStereoDft->g_state_fx[b], 2 ); + tmp_p = add( ONE_IN_Q13, tmp16 ); + tmp_s = sub( ONE_IN_Q13, tmp16 ); + tmp16 = mult( gamma, 2048 ); + + c = BASOP_Util_Divide3232_Scale( + L_mac( L_mult( tmp_p, tmp_p ), gamma, tmp16 ), + L_mac( L_mult( tmp_s, tmp_s ), gamma, tmp16 ), + &c_e ); +#else c = BASOP_Util_Divide3232_Scale( L_add( L_mult( add( ONE_IN_Q13, shr( hStereoDft->g_state_fx[b], 2 ) ), add( ONE_IN_Q13, shr( hStereoDft->g_state_fx[b], 2 ) ) ), @@ -236,6 +261,7 @@ static void stereo_dft_generate_comfort_noise_fx( sub( ONE_IN_Q13, shr( hStereoDft->g_state_fx[b], 2 ) ) ), L_shr( L_mult( gamma, gamma ), 4 ) ), &c_e ); +#endif q_sqrt = c_e; move16(); sqrt_res = Sqrt16( mult( c, hStereoCng->cm_fx[b] ), &q_sqrt ); diff --git a/lib_dec/ivas_stereo_dft_dec_fx.c b/lib_dec/ivas_stereo_dft_dec_fx.c index 9c1b488cb..195166e7e 100644 --- a/lib_dec/ivas_stereo_dft_dec_fx.c +++ b/lib_dec/ivas_stereo_dft_dec_fx.c @@ -3549,8 +3549,11 @@ void stereo_dft_dec_sid_coh_fx( pred_fx = add( pred_fx, shl( mult( ( *pptr_fx++ ), cohBandq_fx[i] ), 2 ) ); /*q-13*/ } /* Weighted intra/inter-frame prediction */ +#ifdef IMPROV_PRECISION + pred_fx = mac_r( L_mult( alpha_fx, pred_fx ), sub( 32767, alpha_fx ), shr( coh_fx[b], 2 ) ); /*q-13*/ +#else pred_fx = add( mult( alpha_fx, pred_fx ), mult( sub( 32767, alpha_fx ), shr( coh_fx[b], 2 ) ) ); /*q-13*/ - +#endif /* Read residual index from bitstream */ IF( LT_16( *nb_bits, nr_of_sid_stereo_bits ) ) /* If the bit limit is reached, res_index = 0 is assumed for remaining indices */ { diff --git a/lib_dec/ivas_stereo_ica_dec_fx.c b/lib_dec/ivas_stereo_ica_dec_fx.c index 694aad0b4..fa0da5cf7 100644 --- a/lib_dec/ivas_stereo_ica_dec_fx.c +++ b/lib_dec/ivas_stereo_ica_dec_fx.c @@ -201,7 +201,11 @@ void stereo_tca_dec_fx( IF( currentNCShift != 0 ) { +#ifdef IMPROV_PRECISION + currentNCShift = mac_r( L_mult( 19660 /* 0.6 in Q15 */, prevNCShift ), 13106 /* 0.4 in Q15 */, currentNCShift ); /* Q0 */ +#else currentNCShift = add( mult( 19660 /* 0.6 in Q15 */, prevNCShift ), mult( 13106 /* 0.4 in Q15 */, currentNCShift ) ); /* Q0 */ +#endif } prevNCShift = hStereoTCA->interp_dec_prevNCShift; /* Q0 */ -- GitLab From 0acc6afadb37f1645a716bcfb52676ccdd87b2e5 Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Thu, 20 Mar 2025 08:35:55 -0400 Subject: [PATCH 3/3] fix clang --- lib_dec/ivas_stereo_cng_dec.c | 6 +++--- lib_enc/fd_cng_enc_fx.c | 6 +++--- lib_enc/ivas_ism_param_enc_fx.c | 6 +++--- lib_enc/ivas_omasa_enc_fx.c | 2 +- lib_enc/ivas_td_low_rate_enc_fx.c | 10 +++++----- lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++---- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/lib_dec/ivas_stereo_cng_dec.c b/lib_dec/ivas_stereo_cng_dec.c index 1994f4875..2291fcea1 100644 --- a/lib_dec/ivas_stereo_cng_dec.c +++ b/lib_dec/ivas_stereo_cng_dec.c @@ -199,7 +199,7 @@ static void stereo_dft_generate_comfort_noise_fx( #ifdef IMPROV_PRECISION hStereoCng->cm_fx[b] = mac_r( L_mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), A_GFILT_FX, hStereoCng->cm_fx[b] ); /* Q15 */ #else - hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); /* Q15 */ + hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); /* Q15 */ #endif move16(); } @@ -209,7 +209,7 @@ static void stereo_dft_generate_comfort_noise_fx( #ifdef IMPROV_PRECISION hStereoCng->cm_fx[b] = mac_r( L_mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), A_GFILT_FX, hStereoCng->cm_fx[b] ); /* Q15 */ #else - hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); /* Q15 */ + hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); /* Q15 */ #endif move16(); } @@ -247,7 +247,7 @@ static void stereo_dft_generate_comfort_noise_fx( tmp_p = add( ONE_IN_Q13, tmp16 ); tmp_s = sub( ONE_IN_Q13, tmp16 ); tmp16 = mult( gamma, 2048 ); - + c = BASOP_Util_Divide3232_Scale( L_mac( L_mult( tmp_p, tmp_p ), gamma, tmp16 ), L_mac( L_mult( tmp_s, tmp_s ), gamma, tmp16 ), diff --git a/lib_enc/fd_cng_enc_fx.c b/lib_enc/fd_cng_enc_fx.c index c690b6a76..328b81762 100644 --- a/lib_enc/fd_cng_enc_fx.c +++ b/lib_enc/fd_cng_enc_fx.c @@ -2737,9 +2737,9 @@ void stereoFdCngCoherence_fx( eL = BASOP_Util_Add_Mant32Exp( eL, eL_exp, L_mac( L_mult( pt_fftL[0], pt_fftL[0] ), pt_fftL[L_FFT / 2], pt_fftL[L_FFT / 2] ), shl( fft_exp, 1 ), &eL_exp ); /* exp(eL_exp) */ eR = BASOP_Util_Add_Mant32Exp( eR, eR_exp, L_mac( L_mult( pt_fftR[0], pt_fftR[0] ), pt_fftR[L_FFT / 2], pt_fftR[L_FFT / 2] ), shl( fft_exp, 1 ), &eR_exp ); /* exp(eR_exp) */ #else - cr = BASOP_Util_Add_Mant32Exp( cr, cr_exp, L_add( L_mult( pt_fftL[0], pt_fftR[0] ), L_mult( pt_fftL[L_FFT / 2], pt_fftR[L_FFT / 2] ) ), shl( fft_exp, 1 ), &cr_exp ); /* exp(cr_exp) */ - eL = BASOP_Util_Add_Mant32Exp( eL, eL_exp, L_add( L_mult( pt_fftL[0], pt_fftL[0] ), L_mult( pt_fftL[L_FFT / 2], pt_fftL[L_FFT / 2] ) ), shl( fft_exp, 1 ), &eL_exp ); /* exp(eL_exp) */ - eR = BASOP_Util_Add_Mant32Exp( eR, eR_exp, L_add( L_mult( pt_fftR[0], pt_fftR[0] ), L_mult( pt_fftR[L_FFT / 2], pt_fftR[L_FFT / 2] ) ), shl( fft_exp, 1 ), &eR_exp ); /* exp(eR_exp) */ + cr = BASOP_Util_Add_Mant32Exp( cr, cr_exp, L_add( L_mult( pt_fftL[0], pt_fftR[0] ), L_mult( pt_fftL[L_FFT / 2], pt_fftR[L_FFT / 2] ) ), shl( fft_exp, 1 ), &cr_exp ); /* exp(cr_exp) */ + eL = BASOP_Util_Add_Mant32Exp( eL, eL_exp, L_add( L_mult( pt_fftL[0], pt_fftL[0] ), L_mult( pt_fftL[L_FFT / 2], pt_fftL[L_FFT / 2] ) ), shl( fft_exp, 1 ), &eL_exp ); /* exp(eL_exp) */ + eR = BASOP_Util_Add_Mant32Exp( eR, eR_exp, L_add( L_mult( pt_fftR[0], pt_fftR[0] ), L_mult( pt_fftR[L_FFT / 2], pt_fftR[L_FFT / 2] ) ), shl( fft_exp, 1 ), &eR_exp ); /* exp(eR_exp) */ #endif FOR( i = 1; i < L_FFT / 2; i++ ) { diff --git a/lib_enc/ivas_ism_param_enc_fx.c b/lib_enc/ivas_ism_param_enc_fx.c index 9d9b27786..c46983578 100644 --- a/lib_enc/ivas_ism_param_enc_fx.c +++ b/lib_enc/ivas_ism_param_enc_fx.c @@ -298,7 +298,7 @@ void ivas_param_ism_stereo_dmx_fx( #ifdef IMPROV_PRECISION cardioid_left[i] = mac_r( L_mult( alpha, 16384 ), sub( ONE_IN_Q15 - 1, alpha ), getCosWord16( extract_l( tmp ) ) ); // Q14 #else - cardioid_left[i] = add( shr( alpha, 1 ), mult( sub( ONE_IN_Q15 - 1, alpha ), getCosWord16( extract_l( tmp ) ) ) ); // Q14 + cardioid_left[i] = add( shr( alpha, 1 ), mult( sub( ONE_IN_Q15 - 1, alpha ), getCosWord16( extract_l( tmp ) ) ) ); // Q14 #endif move16(); @@ -307,8 +307,8 @@ void ivas_param_ism_stereo_dmx_fx( Word16 last_cardioid_right; last_cardioid_right = sub( ONE_IN_Q14 /* 1.0f in Q14 */, last_cardioid_left ); /* Smoothing */ -#ifdef IMPROV_PRECISION - cardioid_left[i] = mac_r( L_mult( 24576 /* 0.75f in Q15 */, cardioid_left[i] ), 8192 /* 0.25f in Q15 */, last_cardioid_left ) ; // Q14 +#ifdef IMPROV_PRECISION + cardioid_left[i] = mac_r( L_mult( 24576 /* 0.75f in Q15 */, cardioid_left[i] ), 8192 /* 0.25f in Q15 */, last_cardioid_left ); // Q14 #else cardioid_left[i] = add( mult( 24576 /* 0.75f in Q15 */, cardioid_left[i] ), mult( 8192 /* 0.25f in Q15 */, last_cardioid_left ) ); // Q14 #endif diff --git a/lib_enc/ivas_omasa_enc_fx.c b/lib_enc/ivas_omasa_enc_fx.c index 855a505d9..b5b9102ea 100644 --- a/lib_enc/ivas_omasa_enc_fx.c +++ b/lib_enc/ivas_omasa_enc_fx.c @@ -1475,7 +1475,7 @@ static void ivas_omasa_dmx_fx( { g1 = interpolator[k]; move16(); - g2 = sub( MAX_WORD16, g1 ); /*q15*/ + g2 = sub( MAX_WORD16, g1 ); /*q15*/ #ifdef IMPROV_PRECISION_EVSBE data_out[j][k] = L_add( data_out[j][k], Mpy_32_32( L_mac( L_mult( g1, gains[j] ), g2, prev_gains[i][j] ) /*q31*/, data_in[i][k] ) ); /*Qx*/ #else diff --git a/lib_enc/ivas_td_low_rate_enc_fx.c b/lib_enc/ivas_td_low_rate_enc_fx.c index 29dd767bb..a7dd96486 100644 --- a/lib_enc/ivas_td_low_rate_enc_fx.c +++ b/lib_enc/ivas_td_low_rate_enc_fx.c @@ -352,9 +352,9 @@ void encod_gen_2sbfr( #ifndef FIX_1320_LOWRATE_ACELP hLPDmem->tilt_code = est_tilt_ivas_fx( exc + i_subfr, gain_pit, code, gain_code, &voice_fac, Q_new, 2 * L_SUBFR, 0 ); #else - Lgcode = L_shl_sat( gain_code, Q_new ); /* scaled gain_code with Qnew -> Q16*/ - gcode16 = round_fx_sat( Lgcode ); /*Q0*/ - hLPDmem->tilt_code = est_tilt_ivas_fx( exc + i_subfr, gain_pit, code, Lgcode, &voice_fac, Q_new, 2 * L_SUBFR, 0 ); /* Q15 */ + Lgcode = L_shl_sat( gain_code, Q_new ); /* scaled gain_code with Qnew -> Q16*/ + gcode16 = round_fx_sat( Lgcode ); /*Q0*/ + hLPDmem->tilt_code = est_tilt_ivas_fx( exc + i_subfr, gain_pit, code, Lgcode, &voice_fac, Q_new, 2 * L_SUBFR, 0 ); /* Q15 */ #endif move16(); @@ -365,8 +365,8 @@ void encod_gen_2sbfr( #ifndef FIX_1320_LOWRATE_ACELP hLPDmem->mem_w0 = sub( sub( xn[2 * L_SUBFR - 1], mult_r( gain_pit, y1[2 * L_SUBFR - 1] ) ), mult_r( extract_h( gain_code ), y2[2 * L_SUBFR - 1] ) ); #else - Ltmp = L_mult0( gcode16, y2[2 * L_SUBFR - 1] ); /*Q10*/ - Ltmp = L_shl( Ltmp, add( 5, shift ) ); /*Q15+shift*/ + Ltmp = L_mult0( gcode16, y2[2 * L_SUBFR - 1] ); /*Q10*/ + Ltmp = L_shl( Ltmp, add( 5, shift ) ); /*Q15+shift*/ Ltmp = L_negate( Ltmp ); Ltmp = L_mac( Ltmp, xn[2 * L_SUBFR - 1], 16384 /*Q14*/ ); /* Q_new-1+shift+14+1 */ Ltmp = L_msu( Ltmp, y1[2 * L_SUBFR - 1], gain_pit /*Q14*/ ); /* Q_new-1+shift+14+1 */ diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 70c52a210..f93885e97 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -2554,7 +2554,7 @@ static void ivas_dirac_dec_binaural_process_output_fx( #ifdef IMPROV_PRECISION gain = mac_r( L_mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxRePrev_fx[chA][chB][bin] ), interpVal_fx, hDiracDecBin->processMtxRe_fx[chA][chB][bin] ); // Q11 #else - gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxRePrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxRe_fx[chA][chB][bin] ) ); // Q11 + gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxRePrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxRe_fx[chA][chB][bin] ) ); // Q11 #endif outSlotRe_fx[bin] = Madd_32_16( outSlotRe_fx[bin], inRe_fx[chB][slot][bin], gain ); // q_inp_mix-4//q_result outSlotIm_fx[bin] = Madd_32_16( outSlotIm_fx[bin], inIm_fx[chB][slot][bin], gain ); // q_inp_mix-4//q_result @@ -2564,7 +2564,7 @@ static void ivas_dirac_dec_binaural_process_output_fx( #ifdef IMPROV_PRECISION_EVSBE gain = mac_r( L_mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxImPrev_fx[chA][chB][bin] ), interpVal_fx, hDiracDecBin->processMtxIm_fx[chA][chB][bin] ); // Q11 #else - gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxImPrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxIm_fx[chA][chB][bin] ) ); // Q11 + gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxImPrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxIm_fx[chA][chB][bin] ) ); // Q11 #endif // interpVal * hDiracDecBin->processMtxIm[chA][chB][bin]; outSlotRe_fx[bin] = Msub_32_16( outSlotRe_fx[bin], inIm_fx[chB][slot][bin], gain ); // q_inp_mix-4//q_result @@ -2594,8 +2594,8 @@ static void ivas_dirac_dec_binaural_process_output_fx( #else gain = add( mult( sub( 32767, interpVal_fx ), hDiracDecBin->processMtxDecImPrev_fx[chA][chB][bin] ), mult( interpVal_fx, hDiracDecBin->processMtxDecIm_fx[chA][chB][bin] ) ); // Q11 #endif - outSlotRe_fx[bin] = Msub_32_16( outSlotRe_fx[bin], decSlotImPointer_fx[bin], gain ); // q_inp_mix-4//q_result - outSlotIm_fx[bin] = Madd_32_16( outSlotIm_fx[bin], decSlotRePointer_fx[bin], gain ); // q_inp_mix-4//q_result + outSlotRe_fx[bin] = Msub_32_16( outSlotRe_fx[bin], decSlotImPointer_fx[bin], gain ); // q_inp_mix-4//q_result + outSlotIm_fx[bin] = Madd_32_16( outSlotIm_fx[bin], decSlotRePointer_fx[bin], gain ); // q_inp_mix-4//q_result move32(); move32(); } -- GitLab