From 01481ee722b00901dec204e0ad3a300a8e2f5ec1 Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Fri, 6 Feb 2026 14:16:22 -0500 Subject: [PATCH 1/5] acelp_fast optimisation --- lib_com/ivas_prot_fx.h | 6 ++ lib_com/options.h | 3 + lib_enc/cod4t64_fast_fx.c | 191 ++++++++++++++++++++++++++++++++++++-- lib_enc/inov_enc_fx.c | 8 ++ 4 files changed, 202 insertions(+), 6 deletions(-) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index d87207a90..c82493fa7 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -3031,9 +3031,15 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dn */ +#ifdef OPT_241x_ACELP_FAST + const Word16 Q_dncn, /* i : scaling factor of dn and cn */ +#else Word16 Q_dn, +#endif const Word16 cn[L_SUBFR], /* i : residual after long term prediction q_cn*/ +#ifndef OPT_241x_ACELP_FAST const Word16 q_cn, +#endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation */ Word16 y[], /* o : filtered fixed codebook excitation */ diff --git a/lib_com/options.h b/lib_com/options.h index 61a3c10cc..51c477192 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -95,6 +95,9 @@ #define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ #define FIX_2392_MSAN_DESTROY_DEC /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */ +#define OPT_241x_ACELP_FAST /* before 0.32 1.338 4.291 2.524 total 107.916 -> 0.32 1.306 3.985 2.442*/ +//#define OPT_241x_ACELP_FAST_2 +//#define OPT_241x_ACELP_FAST_3 /* #################### End BE switches ################################## */ /* #################### Start NON-BE switches ############################ */ diff --git a/lib_enc/cod4t64_fast_fx.c b/lib_enc/cod4t64_fast_fx.c index 4bba58255..0c145b7c1 100644 --- a/lib_enc/cod4t64_fast_fx.c +++ b/lib_enc/cod4t64_fast_fx.c @@ -122,11 +122,20 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], +#ifdef OPT_241x_ACELP_FAST + /* i : corr. between target and h[]. */ // Q_dncn + const Word16 Q_dncn, +#else /* i : corr. between target and h[]. */ // Q_dn Word16 Q_dn, +#endif const Word16 cn[L_SUBFR], +#ifdef OPT_241x_ACELP_FAST + /* i : residual after long term prediction */ // Q_dncn +#else /* i : residual after long term prediction */ // q_cn const Word16 q_cn, +#endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter */ // e(norm_s(H[0])+1) Word16 code[L_SUBFR], @@ -163,7 +172,9 @@ void acelp_fast_fx( Word16 flag = 0; move16(); Word32 temp1, temp2, temp3, temp4, temp5, temp6; +#ifndef OPT_241x_ACELP_FAST Word16 q_temp1, q_temp2; +#endif Word16 scale_temp1, scale_temp2; /*-----------------------------------------------------------------* * Initialization @@ -294,9 +305,9 @@ void acelp_fast_fx( /*-----------------------------------------------------------------* * Find signal bn[] and sign pre-selection vector sign[]. *-----------------------------------------------------------------*/ - +#ifndef OPT_241x_ACELP_FAST exp = sub( Q31, shl( Q_dn, 1 ) ); - +#endif s64 = 0; move64(); FOR( i = 0; i < L_subfr; i++ ) @@ -310,8 +321,17 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); +#ifdef OPT_241x_ACELP_FAST_2 + dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 + dndn_e = sub( 63 - 1, new_exp1 ); +#else dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 +#ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), new_exp1 ), 32 ) ); +#else + dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); +#endif +#endif } @@ -336,8 +356,17 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); +#ifdef OPT_241x_ACELP_FAST_2 + cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 + cncn_track_e[q] = sub( 63 - 1, new_exp1 ); +#else cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 +#ifndef OPT_241x_ACELP_FAST cncn_track_e[q] = sub( 31, sub( add( add( shl( q_cn, 1 ), 1 ), new_exp1 ), 32 ) ); +#else + cncn_track_e[q] = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); +#endif +#endif } cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e) } @@ -348,8 +377,14 @@ void acelp_fast_fx( tmp = add( tmp, sub( dndn_e, cncn_e ) ); s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(15 - tmp) +#ifdef OPT_241x_ACELP_FAST + scale_temp1 = sub( Q16, tmp ); + /* Q_dn = q_cn and it doesn't matter */ +#else q_temp1 = add( add( sub( Q15, tmp ), q_cn ), Q1 ); scale_temp1 = sub( q_temp1, Q_dn ); +#endif + FOR( i = 0; i < L_subfr; i++ ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 @@ -360,6 +395,15 @@ void acelp_fast_fx( bn_orig_fx[i] = L_add( temp1, temp2 ); // Q_dn move32(); +#ifdef OPT_241x_ACELP_FAST + sign_fx[i] = -1; + move16(); + if( bn_orig_fx[i] >= 0 ) + { + sign_fx[i] = 1; + move16(); + } +#else IF( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; @@ -369,6 +413,7 @@ void acelp_fast_fx( sign_fx[i] = -1; } move16(); +#endif } /*-----------------------------------------------------------------* @@ -458,16 +503,49 @@ void acelp_fast_fx( test(); test(); /* skip certain tracks if number of pulses is lower than number of tracks */ +#ifdef OPT_241x_ACELP_FAST_3 IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { + max_track[NB_TRACK_FCB_4T - 3] = L_deposit_l( -1 ); + move32(); + max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); + move32(); + } + ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) + { + max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); + move32(); + } + + FOR( q = 0; q < nb_tracks; q++ ) + { + i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); + track_order[q] = i; + move16(); + max_track[i] = L_deposit_l( -1 ); + move32(); + } +#else + IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) + { +#ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn move32(); max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn +#else + max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dncn ); // Q_dn + move32(); + max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn +#endif move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { +#ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn +#else + max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn +#endif move32(); } @@ -476,10 +554,14 @@ void acelp_fast_fx( i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); +#ifndef OPT_241x_ACELP_FAST max_track[i] = L_shl( -1, Q_dn ); // Q_dn +#else + max_track[i] = L_shl( -1, Q_dncn ); // Q_dn +#endif move32(); } - +#endif track_order[4] = track_order[1]; // Q0 move16(); track_order[5] = track_order[0]; // Q0 @@ -617,9 +699,13 @@ void acelp_fast_fx( move64(); FOR( i = track; i < L_subfr; i += nb_tracks ) { +#ifdef OPT_241x_ACELP_FAST + temp3 = L_msu0( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ); +#else temp1 = L_mult0( Gd, dn_orig[i] ); temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_sub( temp1, temp2 ); +#endif dn[i] = L_shr( temp3, 6 ); move32(); alp_pos0 += nb_tracks; @@ -627,8 +713,15 @@ void acelp_fast_fx( } exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31 +#ifdef OPT_241x_ACELP_FAST_2 + dndn_e = sub( 63 - 1, exp1 ); +#else +#ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), exp1 ), 32 ) ); - +#else + dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), exp1 ), 32 ) ); +#endif +#endif IF( dndn_fx == 0 ) { dndn_fx = 214748365 /* 0.1f in Q31 */; @@ -645,10 +738,16 @@ void acelp_fast_fx( move16(); m[1] = track; // Q0 move16(); +#ifdef OPT_241x_ACELP_FAST + scale_temp1 = sub( Q16, exp1 ); + scale_temp2 = Q2; + move16(); +#else q_temp1 = add( add( sub( Q15, exp1 ), q_cn ), 1 ); q_temp2 = add( Q_dn, Q2 ); scale_temp1 = sub( q_temp1, Q_dn ); scale_temp2 = sub( q_temp2, Q_dn ); +#endif FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 @@ -683,7 +782,11 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[1], dn_orig[m[1]] ) ); // Q_dn Gd32 = Gd; move16(); +#ifdef OPT_241x_ACELP_FAST + Gd32 = L_add( Gd32, L_mac0( alp[0], i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ); // Q6 +#else Gd32 = L_add( Gd32, L_add( alp[0], L_mult0( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6 +#endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn @@ -697,10 +800,15 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); +#ifdef OPT_241x_ACELP_FAST + temp4 = L_msu0( temp1, G, *alp_pos0 ); + temp4 = L_msu0( temp4, G1, *alp_pos1 ); +#else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_sub( temp1, temp2 ); temp4 = L_sub( temp4, temp3 ); +#endif dn[i] = L_shr( temp4, 6 ); move32(); alp_pos0 += nb_tracks; @@ -720,9 +828,15 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[2], dn_orig[m[2]] ) ); // Q_dn temp1 = alp[0]; move32(); +#ifdef OPT_241x_ACELP_FAST + temp2 = L_mac0(temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); + temp3 = L_mac0(temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); + Gd32 = L_add( Gd32, temp3 ); // Q6 +#else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, L_add( L_add( temp1, temp2 ), temp3 ) ); // Q6 +#endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn @@ -739,12 +853,21 @@ void acelp_fast_fx( { temp1 = imult3216( Gd32, dn_orig[i] ); +#ifdef OPT_241x_ACELP_FAST + //temp2 = L_mult0( G, *alp_pos0 ); + //temp3 = L_mult0( G1, *alp_pos1 ); + //temp4 = L_mult0( G2, *alp_pos2 ); + temp5 = L_msu0( temp1, G, *alp_pos0 ); + temp5 = L_msu0( temp5, G1, *alp_pos1 ); + temp5 = L_msu0( temp5, G2, *alp_pos2 ); +#else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_sub( temp1, temp2 ); temp5 = L_sub( temp5, temp3 ); temp5 = L_sub( temp5, temp4 ); +#endif dn[i] = L_shr( temp5, 6 ); move32(); alp_pos0 += nb_tracks; @@ -770,11 +893,19 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[3], dn_orig[m[3]] ) ); // Q_dn temp1 = alp[0]; move32(); +#ifdef OPT_241x_ACELP_FAST + temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); + temp2 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); + temp2 = L_mac0( temp2, i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); + + Gd32 = L_add( Gd32, temp2 ); // Q6 +#else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp4 = L_mult0( i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, L_add( L_add( L_add( temp1, temp2 ), temp3 ), temp4 ) ); // Q6 +#endif G = Gn; move16(); // Q_dn G1 = i_mult( G, s[1] ); // Q_dn @@ -795,6 +926,12 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); +#ifdef OPT_241x_ACELP_FAST + temp6 = L_msu0( temp1, G, *alp_pos0 ); + temp6 = L_msu0( temp6, G1, *alp_pos1 ); + temp6 = L_msu0( temp6, G2, *alp_pos2 ); + temp6 = L_msu0( temp6, G3, *alp_pos3 ); +#else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); @@ -803,6 +940,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); +#endif dn[i] = L_shr( temp6, 6 ); move32(); alp_pos0 += nb_tracks; @@ -824,6 +962,12 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i++ ) { temp1 = imult3216( Gd32, dn_orig[i] ); +#ifdef OPT_241x_ACELP_FAST + temp6 = L_msu0( temp1, G, *alp_pos0 ); + temp6 = L_msu0( temp6, G1, *alp_pos1 ); + temp6 = L_msu0( temp6, G2, *alp_pos2 ); + temp6 = L_msu0( temp6, G3, *alp_pos3 ); +#else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); @@ -832,6 +976,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); +#endif dn[i] = L_shr( temp6, 6 ); move16(); alp_pos0++; @@ -885,6 +1030,13 @@ void acelp_fast_fx( FOR( j = 0; j < nb_pulse; j++ ) { +#ifdef OPT_241x_ACELP_FAST + p_hn = h_inv - m[j]; + if( s[j] > 0 ) + { + p_hn = h - m[j]; + } +#else IF( s[j] > 0 ) { p_hn = h - m[j]; @@ -893,7 +1045,7 @@ void acelp_fast_fx( { p_hn = h_inv - m[j]; } - +#endif FOR( i = 0; i < L_subfr; i++ ) { y_tmp[i] = add_sat( y_tmp[i], *p_hn++ ); // q_H @@ -906,8 +1058,15 @@ void acelp_fast_fx( s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn exp = W_norm( s64 ); crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32 +#ifdef OPT_241x_ACELP_FAST_2 + q_crit_num = sub( exp, 32 ); +#else +#ifndef OPT_241x_ACELP_FAST q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) ); - +#else + q_crit_num = add( shl( Q_dncn, 1 ), sub( exp, 32 ) ); +#endif +#endif // crit_den = sum2_fx( y_tmp, L_subfr ); // 2*q_H s64 = 0; move64(); @@ -926,6 +1085,15 @@ void acelp_fast_fx( IF( GT_16( exp, exp1 ) ) { +#ifdef OPT_241x_ACELP_FAST + flag = 0; + move16(); + if ( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) + { + flag = 1; + move16(); + } +#else IF( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; @@ -936,9 +1104,19 @@ void acelp_fast_fx( flag = 0; move16(); } +#endif } ELSE { +#ifdef OPT_241x_ACELP_FAST + flag = 0; + move16(); + if( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) + { + flag = 1; + move16(); + } +#else IF( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; @@ -949,6 +1127,7 @@ void acelp_fast_fx( flag = 0; move16(); } +#endif } diff --git a/lib_enc/inov_enc_fx.c b/lib_enc/inov_enc_fx.c index 0efcf795b..67e996461 100644 --- a/lib_enc/inov_enc_fx.c +++ b/lib_enc/inov_enc_fx.c @@ -369,7 +369,11 @@ Word16 inov_encode_fx( } ELSE { +#ifdef OPT_241x_ACELP_FAST + acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, h2, code, y2, L_subfr ); +#else acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, Qcn, h2, code, y2, L_subfr ); +#endif } } ELSE IF( ( EQ_16( st_fx->idchan, 1 ) && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 7 ) ) || ( st_fx->idchan == 0 && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 3 ) ) ) @@ -380,7 +384,11 @@ Word16 inov_encode_fx( } ELSE { +#ifdef OPT_241x_ACELP_FAST + acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, h2, code, y2, L_SUBFR ); +#else acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, Qcn, h2, code, y2, L_SUBFR ); +#endif } } ELSE -- GitLab From 49539441eb55a9a8e945643957148a84be32f4df Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Fri, 6 Feb 2026 14:21:27 -0500 Subject: [PATCH 2/5] fix clang format --- lib_enc/cod4t64_fast_fx.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/lib_enc/cod4t64_fast_fx.c b/lib_enc/cod4t64_fast_fx.c index 0c145b7c1..30e88e8a3 100644 --- a/lib_enc/cod4t64_fast_fx.c +++ b/lib_enc/cod4t64_fast_fx.c @@ -131,7 +131,7 @@ void acelp_fast_fx( #endif const Word16 cn[L_SUBFR], #ifdef OPT_241x_ACELP_FAST - /* i : residual after long term prediction */ // Q_dncn +/* i : residual after long term prediction */ // Q_dncn #else /* i : residual after long term prediction */ // q_cn const Word16 q_cn, @@ -172,7 +172,7 @@ void acelp_fast_fx( Word16 flag = 0; move16(); Word32 temp1, temp2, temp3, temp4, temp5, temp6; -#ifndef OPT_241x_ACELP_FAST +#ifndef OPT_241x_ACELP_FAST Word16 q_temp1, q_temp2; #endif Word16 scale_temp1, scale_temp2; @@ -398,7 +398,7 @@ void acelp_fast_fx( #ifdef OPT_241x_ACELP_FAST sign_fx[i] = -1; move16(); - if( bn_orig_fx[i] >= 0 ) + if ( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; move16(); @@ -557,7 +557,7 @@ void acelp_fast_fx( #ifndef OPT_241x_ACELP_FAST max_track[i] = L_shl( -1, Q_dn ); // Q_dn #else - max_track[i] = L_shl( -1, Q_dncn ); // Q_dn + max_track[i] = L_shl( -1, Q_dncn ); // Q_dn #endif move32(); } @@ -787,7 +787,7 @@ void acelp_fast_fx( #else Gd32 = L_add( Gd32, L_add( alp[0], L_mult0( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6 #endif - G = Gn; // Q_dn + G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn G = i_mult( G, s[0] ); // Q_dn @@ -829,15 +829,15 @@ void acelp_fast_fx( temp1 = alp[0]; move32(); #ifdef OPT_241x_ACELP_FAST - temp2 = L_mac0(temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); - temp3 = L_mac0(temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); + temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); + temp3 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, temp3 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, L_add( L_add( temp1, temp2 ), temp3 ) ); // Q6 #endif - G = Gn; // Q_dn + G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn G2 = i_mult( G, s[2] ); // Q_dn @@ -854,9 +854,9 @@ void acelp_fast_fx( temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST - //temp2 = L_mult0( G, *alp_pos0 ); - //temp3 = L_mult0( G1, *alp_pos1 ); - //temp4 = L_mult0( G2, *alp_pos2 ); + // temp2 = L_mult0( G, *alp_pos0 ); + // temp3 = L_mult0( G1, *alp_pos1 ); + // temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_msu0( temp1, G, *alp_pos0 ); temp5 = L_msu0( temp5, G1, *alp_pos1 ); temp5 = L_msu0( temp5, G2, *alp_pos2 ); @@ -867,7 +867,7 @@ void acelp_fast_fx( temp5 = L_sub( temp1, temp2 ); temp5 = L_sub( temp5, temp3 ); temp5 = L_sub( temp5, temp4 ); -#endif +#endif dn[i] = L_shr( temp5, 6 ); move32(); alp_pos0 += nb_tracks; @@ -976,7 +976,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); -#endif +#endif dn[i] = L_shr( temp6, 6 ); move16(); alp_pos0++; @@ -1032,7 +1032,7 @@ void acelp_fast_fx( { #ifdef OPT_241x_ACELP_FAST p_hn = h_inv - m[j]; - if( s[j] > 0 ) + if ( s[j] > 0 ) { p_hn = h - m[j]; } @@ -1111,7 +1111,7 @@ void acelp_fast_fx( #ifdef OPT_241x_ACELP_FAST flag = 0; move16(); - if( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) + if ( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; move16(); -- GitLab From 7beda9d2d0ed10e4e7be96236f3105b347e28596 Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Fri, 6 Feb 2026 14:32:36 -0500 Subject: [PATCH 3/5] activation of a second switch --- lib_com/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/options.h b/lib_com/options.h index 51c477192..7d98d6d33 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -96,7 +96,7 @@ #define FIX_2392_MSAN_DESTROY_DEC /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */ #define OPT_241x_ACELP_FAST /* before 0.32 1.338 4.291 2.524 total 107.916 -> 0.32 1.306 3.985 2.442*/ -//#define OPT_241x_ACELP_FAST_2 +#define OPT_241x_ACELP_FAST_2 //#define OPT_241x_ACELP_FAST_3 /* #################### End BE switches ################################## */ -- GitLab From bca73a0363fbde0ce503c328b97a9f7044a7b764 Mon Sep 17 00:00:00 2001 From: Tommy Vaillancourt Date: Fri, 6 Feb 2026 15:04:12 -0500 Subject: [PATCH 4/5] cleanup --- lib_com/options.h | 5 ++--- lib_enc/cod4t64_fast_fx.c | 34 +--------------------------------- 2 files changed, 3 insertions(+), 36 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 7d98d6d33..43c57fdc1 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -95,9 +95,8 @@ #define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ #define FIX_2392_MSAN_DESTROY_DEC /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */ -#define OPT_241x_ACELP_FAST /* before 0.32 1.338 4.291 2.524 total 107.916 -> 0.32 1.306 3.985 2.442*/ -#define OPT_241x_ACELP_FAST_2 -//#define OPT_241x_ACELP_FAST_3 +#define OPT_241x_ACELP_FAST /* VA: basop issue 2426, optimisation of acelp_fast_fx ( reduc. compl. by 0.35 wmops ) */ + /* #################### End BE switches ################################## */ /* #################### Start NON-BE switches ############################ */ diff --git a/lib_enc/cod4t64_fast_fx.c b/lib_enc/cod4t64_fast_fx.c index 30e88e8a3..dafd72bb5 100644 --- a/lib_enc/cod4t64_fast_fx.c +++ b/lib_enc/cod4t64_fast_fx.c @@ -321,16 +321,11 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); -#ifdef OPT_241x_ACELP_FAST_2 - dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 - dndn_e = sub( 63 - 1, new_exp1 ); -#else dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), new_exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); -#endif #endif } @@ -356,16 +351,11 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); -#ifdef OPT_241x_ACELP_FAST_2 - cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 - cncn_track_e[q] = sub( 63 - 1, new_exp1 ); -#else cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_241x_ACELP_FAST cncn_track_e[q] = sub( 31, sub( add( add( shl( q_cn, 1 ), 1 ), new_exp1 ), 32 ) ); #else cncn_track_e[q] = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); -#endif #endif } cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e) @@ -503,7 +493,7 @@ void acelp_fast_fx( test(); test(); /* skip certain tracks if number of pulses is lower than number of tracks */ -#ifdef OPT_241x_ACELP_FAST_3 +#ifdef OPT_241x_ACELP_FAST /* Just need a negative number, it doesn't need to be scaled */ IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { max_track[NB_TRACK_FCB_4T - 3] = L_deposit_l( -1 ); @@ -528,24 +518,14 @@ void acelp_fast_fx( #else IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { -#ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn move32(); max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn -#else - max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dncn ); // Q_dn - move32(); - max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn -#endif move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { -#ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn -#else - max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn -#endif move32(); } @@ -554,11 +534,7 @@ void acelp_fast_fx( i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); -#ifndef OPT_241x_ACELP_FAST max_track[i] = L_shl( -1, Q_dn ); // Q_dn -#else - max_track[i] = L_shl( -1, Q_dncn ); // Q_dn -#endif move32(); } #endif @@ -713,14 +689,10 @@ void acelp_fast_fx( } exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31 -#ifdef OPT_241x_ACELP_FAST_2 - dndn_e = sub( 63 - 1, exp1 ); -#else #ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), exp1 ), 32 ) ); -#endif #endif IF( dndn_fx == 0 ) { @@ -1058,14 +1030,10 @@ void acelp_fast_fx( s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn exp = W_norm( s64 ); crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32 -#ifdef OPT_241x_ACELP_FAST_2 - q_crit_num = sub( exp, 32 ); -#else #ifndef OPT_241x_ACELP_FAST q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) ); #else q_crit_num = add( shl( Q_dncn, 1 ), sub( exp, 32 ) ); -#endif #endif // crit_den = sum2_fx( y_tmp, L_subfr ); // 2*q_H s64 = 0; -- GitLab From 71868b3ed257912dd523a73cad82220e66a4fde2 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 10 Feb 2026 12:26:22 +0100 Subject: [PATCH 5/5] clang-format --- lib_enc/cod4t64_fast_fx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib_enc/cod4t64_fast_fx.c b/lib_enc/cod4t64_fast_fx.c index 5ff684e86..66378d261 100644 --- a/lib_enc/cod4t64_fast_fx.c +++ b/lib_enc/cod4t64_fast_fx.c @@ -119,22 +119,22 @@ static Word16 find_best_pulse_fx( *-------------------------------------------------------------------*/ void acelp_fast_fx( - BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ - const Word16 cdk_index, /* i : codebook index */ - const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dncn */ + BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ + const Word16 cdk_index, /* i : codebook index */ + const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dncn */ #ifdef OPT_2416_ACELP_FAST - const Word16 Q_dncn, /* i : scaling factor of dn and cn */ + const Word16 Q_dncn, /* i : scaling factor of dn and cn */ #else Word16 Q_dn, #endif - const Word16 cn[L_SUBFR], /* i : residual after long term prediction Q_dncn */ + const Word16 cn[L_SUBFR], /* i : residual after long term prediction Q_dncn */ #ifndef OPT_2416_ACELP_FAST const Word16 q_cn, #endif - const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ - Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation Q0 */ - Word16 y[], /* o : filtered fixed codebook excitation e(norm_s(H[0])+1) */ - const Word16 L_subfr /* i : subframe length */ + const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ + Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation Q0 */ + Word16 y[], /* o : filtered fixed codebook excitation e(norm_s(H[0])+1) */ + const Word16 L_subfr /* i : subframe length */ ) { Word16 i, j, q, bits, bits_track, nb_pos, nb_pulse, track, nb_iter, nb_tracks; -- GitLab