Loading lib_com/ivas_prot_fx.h +6 −0 Original line number Diff line number Diff line Loading @@ -3031,9 +3031,15 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dn */ #ifdef OPT_241x_ACELP_FAST const Word16 Q_dncn, /* i : scaling factor of dn and cn */ #else Word16 Q_dn, #endif const Word16 cn[L_SUBFR], /* i : residual after long term prediction q_cn*/ #ifndef OPT_241x_ACELP_FAST const Word16 q_cn, #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation */ Word16 y[], /* o : filtered fixed codebook excitation */ Loading lib_com/options.h +3 −0 Original line number Diff line number Diff line Loading @@ -95,6 +95,9 @@ #define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ #define FIX_2392_MSAN_DESTROY_DEC /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */ #define OPT_241x_ACELP_FAST /* before 0.32 1.338 4.291 2.524 total 107.916 -> 0.32 1.306 3.985 2.442*/ //#define OPT_241x_ACELP_FAST_2 //#define OPT_241x_ACELP_FAST_3 /* #################### End BE switches ################################## */ /* #################### Start NON-BE switches ############################ */ Loading lib_enc/cod4t64_fast_fx.c +185 −6 Original line number Diff line number Diff line Loading @@ -122,11 +122,20 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], #ifdef OPT_241x_ACELP_FAST /* i : corr. between target and h[]. */ // Q_dncn const Word16 Q_dncn, #else /* i : corr. between target and h[]. */ // Q_dn Word16 Q_dn, #endif const Word16 cn[L_SUBFR], #ifdef OPT_241x_ACELP_FAST /* i : residual after long term prediction */ // Q_dncn #else /* i : residual after long term prediction */ // q_cn const Word16 q_cn, #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter */ // e(norm_s(H[0])+1) Word16 code[L_SUBFR], Loading Loading @@ -163,7 +172,9 @@ void acelp_fast_fx( Word16 flag = 0; move16(); Word32 temp1, temp2, temp3, temp4, temp5, temp6; #ifndef OPT_241x_ACELP_FAST Word16 q_temp1, q_temp2; #endif Word16 scale_temp1, scale_temp2; /*-----------------------------------------------------------------* * Initialization Loading Loading @@ -294,9 +305,9 @@ void acelp_fast_fx( /*-----------------------------------------------------------------* * Find signal bn[] and sign pre-selection vector sign[]. *-----------------------------------------------------------------*/ #ifndef OPT_241x_ACELP_FAST exp = sub( Q31, shl( Q_dn, 1 ) ); #endif s64 = 0; move64(); FOR( i = 0; i < L_subfr; i++ ) Loading @@ -310,8 +321,17 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); #ifdef OPT_241x_ACELP_FAST_2 dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 dndn_e = sub( 63 - 1, new_exp1 ); #else dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), new_exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif #endif } Loading @@ -336,8 +356,17 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); #ifdef OPT_241x_ACELP_FAST_2 cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 cncn_track_e[q] = sub( 63 - 1, new_exp1 ); #else cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_241x_ACELP_FAST cncn_track_e[q] = sub( 31, sub( add( add( shl( q_cn, 1 ), 1 ), new_exp1 ), 32 ) ); #else cncn_track_e[q] = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif #endif } cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e) } Loading @@ -348,8 +377,14 @@ void acelp_fast_fx( tmp = add( tmp, sub( dndn_e, cncn_e ) ); s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(15 - tmp) #ifdef OPT_241x_ACELP_FAST scale_temp1 = sub( Q16, tmp ); /* Q_dn = q_cn and it doesn't matter */ #else q_temp1 = add( add( sub( Q15, tmp ), q_cn ), Q1 ); scale_temp1 = sub( q_temp1, Q_dn ); #endif FOR( i = 0; i < L_subfr; i++ ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading @@ -360,6 +395,15 @@ void acelp_fast_fx( bn_orig_fx[i] = L_add( temp1, temp2 ); // Q_dn move32(); #ifdef OPT_241x_ACELP_FAST sign_fx[i] = -1; move16(); if( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; move16(); } #else IF( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; Loading @@ -369,6 +413,7 @@ void acelp_fast_fx( sign_fx[i] = -1; } move16(); #endif } /*-----------------------------------------------------------------* Loading Loading @@ -458,16 +503,49 @@ void acelp_fast_fx( test(); test(); /* skip certain tracks if number of pulses is lower than number of tracks */ #ifdef OPT_241x_ACELP_FAST_3 IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { max_track[NB_TRACK_FCB_4T - 3] = L_deposit_l( -1 ); move32(); max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } FOR( q = 0; q < nb_tracks; q++ ) { i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); max_track[i] = L_deposit_l( -1 ); move32(); } #else IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { #ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn move32(); max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn #else max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dncn ); // Q_dn move32(); max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn #endif move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { #ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn #else max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn #endif move32(); } Loading @@ -476,10 +554,14 @@ void acelp_fast_fx( i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); #ifndef OPT_241x_ACELP_FAST max_track[i] = L_shl( -1, Q_dn ); // Q_dn #else max_track[i] = L_shl( -1, Q_dncn ); // Q_dn #endif move32(); } #endif track_order[4] = track_order[1]; // Q0 move16(); track_order[5] = track_order[0]; // Q0 Loading Loading @@ -617,9 +699,13 @@ void acelp_fast_fx( move64(); FOR( i = track; i < L_subfr; i += nb_tracks ) { #ifdef OPT_241x_ACELP_FAST temp3 = L_msu0( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ); #else temp1 = L_mult0( Gd, dn_orig[i] ); temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_sub( temp1, temp2 ); #endif dn[i] = L_shr( temp3, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -627,8 +713,15 @@ void acelp_fast_fx( } exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifdef OPT_241x_ACELP_FAST_2 dndn_e = sub( 63 - 1, exp1 ); #else #ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), exp1 ), 32 ) ); #endif #endif IF( dndn_fx == 0 ) { dndn_fx = 214748365 /* 0.1f in Q31 */; Loading @@ -645,10 +738,16 @@ void acelp_fast_fx( move16(); m[1] = track; // Q0 move16(); #ifdef OPT_241x_ACELP_FAST scale_temp1 = sub( Q16, exp1 ); scale_temp2 = Q2; move16(); #else q_temp1 = add( add( sub( Q15, exp1 ), q_cn ), 1 ); q_temp2 = add( Q_dn, Q2 ); scale_temp1 = sub( q_temp1, Q_dn ); scale_temp2 = sub( q_temp2, Q_dn ); #endif FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading Loading @@ -683,7 +782,11 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[1], dn_orig[m[1]] ) ); // Q_dn Gd32 = Gd; move16(); #ifdef OPT_241x_ACELP_FAST Gd32 = L_add( Gd32, L_mac0( alp[0], i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ); // Q6 #else Gd32 = L_add( Gd32, L_add( alp[0], L_mult0( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -697,10 +800,15 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST temp4 = L_msu0( temp1, G, *alp_pos0 ); temp4 = L_msu0( temp4, G1, *alp_pos1 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_sub( temp1, temp2 ); temp4 = L_sub( temp4, temp3 ); #endif dn[i] = L_shr( temp4, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -720,9 +828,15 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[2], dn_orig[m[2]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_241x_ACELP_FAST temp2 = L_mac0(temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mac0(temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, temp3 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, L_add( L_add( temp1, temp2 ), temp3 ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -739,12 +853,21 @@ void acelp_fast_fx( { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST //temp2 = L_mult0( G, *alp_pos0 ); //temp3 = L_mult0( G1, *alp_pos1 ); //temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_msu0( temp1, G, *alp_pos0 ); temp5 = L_msu0( temp5, G1, *alp_pos1 ); temp5 = L_msu0( temp5, G2, *alp_pos2 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_sub( temp1, temp2 ); temp5 = L_sub( temp5, temp3 ); temp5 = L_sub( temp5, temp4 ); #endif dn[i] = L_shr( temp5, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -770,11 +893,19 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[3], dn_orig[m[3]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_241x_ACELP_FAST temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, temp2 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp4 = L_mult0( i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, L_add( L_add( L_add( temp1, temp2 ), temp3 ), temp4 ) ); // Q6 #endif G = Gn; move16(); // Q_dn G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -795,6 +926,12 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -803,6 +940,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -824,6 +962,12 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i++ ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -832,6 +976,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move16(); alp_pos0++; Loading Loading @@ -885,6 +1030,13 @@ void acelp_fast_fx( FOR( j = 0; j < nb_pulse; j++ ) { #ifdef OPT_241x_ACELP_FAST p_hn = h_inv - m[j]; if( s[j] > 0 ) { p_hn = h - m[j]; } #else IF( s[j] > 0 ) { p_hn = h - m[j]; Loading @@ -893,7 +1045,7 @@ void acelp_fast_fx( { p_hn = h_inv - m[j]; } #endif FOR( i = 0; i < L_subfr; i++ ) { y_tmp[i] = add_sat( y_tmp[i], *p_hn++ ); // q_H Loading @@ -906,8 +1058,15 @@ void acelp_fast_fx( s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn exp = W_norm( s64 ); crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32 #ifdef OPT_241x_ACELP_FAST_2 q_crit_num = sub( exp, 32 ); #else #ifndef OPT_241x_ACELP_FAST q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) ); #else q_crit_num = add( shl( Q_dncn, 1 ), sub( exp, 32 ) ); #endif #endif // crit_den = sum2_fx( y_tmp, L_subfr ); // 2*q_H s64 = 0; move64(); Loading @@ -926,6 +1085,15 @@ void acelp_fast_fx( IF( GT_16( exp, exp1 ) ) { #ifdef OPT_241x_ACELP_FAST flag = 0; move16(); if ( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; move16(); } #else IF( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; Loading @@ -936,9 +1104,19 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } ELSE { #ifdef OPT_241x_ACELP_FAST flag = 0; move16(); if( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; move16(); } #else IF( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; Loading @@ -949,6 +1127,7 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } Loading lib_enc/inov_enc_fx.c +8 −0 Original line number Diff line number Diff line Loading @@ -369,7 +369,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_241x_ACELP_FAST acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, h2, code, y2, L_subfr ); #else acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, Qcn, h2, code, y2, L_subfr ); #endif } } ELSE IF( ( EQ_16( st_fx->idchan, 1 ) && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 7 ) ) || ( st_fx->idchan == 0 && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 3 ) ) ) Loading @@ -380,7 +384,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_241x_ACELP_FAST acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, h2, code, y2, L_SUBFR ); #else acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, Qcn, h2, code, y2, L_SUBFR ); #endif } } ELSE Loading Loading
lib_com/ivas_prot_fx.h +6 −0 Original line number Diff line number Diff line Loading @@ -3031,9 +3031,15 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dn */ #ifdef OPT_241x_ACELP_FAST const Word16 Q_dncn, /* i : scaling factor of dn and cn */ #else Word16 Q_dn, #endif const Word16 cn[L_SUBFR], /* i : residual after long term prediction q_cn*/ #ifndef OPT_241x_ACELP_FAST const Word16 q_cn, #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation */ Word16 y[], /* o : filtered fixed codebook excitation */ Loading
lib_com/options.h +3 −0 Original line number Diff line number Diff line Loading @@ -95,6 +95,9 @@ #define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ #define FIX_2392_MSAN_DESTROY_DEC /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */ #define OPT_241x_ACELP_FAST /* before 0.32 1.338 4.291 2.524 total 107.916 -> 0.32 1.306 3.985 2.442*/ //#define OPT_241x_ACELP_FAST_2 //#define OPT_241x_ACELP_FAST_3 /* #################### End BE switches ################################## */ /* #################### Start NON-BE switches ############################ */ Loading
lib_enc/cod4t64_fast_fx.c +185 −6 Original line number Diff line number Diff line Loading @@ -122,11 +122,20 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], #ifdef OPT_241x_ACELP_FAST /* i : corr. between target and h[]. */ // Q_dncn const Word16 Q_dncn, #else /* i : corr. between target and h[]. */ // Q_dn Word16 Q_dn, #endif const Word16 cn[L_SUBFR], #ifdef OPT_241x_ACELP_FAST /* i : residual after long term prediction */ // Q_dncn #else /* i : residual after long term prediction */ // q_cn const Word16 q_cn, #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter */ // e(norm_s(H[0])+1) Word16 code[L_SUBFR], Loading Loading @@ -163,7 +172,9 @@ void acelp_fast_fx( Word16 flag = 0; move16(); Word32 temp1, temp2, temp3, temp4, temp5, temp6; #ifndef OPT_241x_ACELP_FAST Word16 q_temp1, q_temp2; #endif Word16 scale_temp1, scale_temp2; /*-----------------------------------------------------------------* * Initialization Loading Loading @@ -294,9 +305,9 @@ void acelp_fast_fx( /*-----------------------------------------------------------------* * Find signal bn[] and sign pre-selection vector sign[]. *-----------------------------------------------------------------*/ #ifndef OPT_241x_ACELP_FAST exp = sub( Q31, shl( Q_dn, 1 ) ); #endif s64 = 0; move64(); FOR( i = 0; i < L_subfr; i++ ) Loading @@ -310,8 +321,17 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); #ifdef OPT_241x_ACELP_FAST_2 dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 dndn_e = sub( 63 - 1, new_exp1 ); #else dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), new_exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif #endif } Loading @@ -336,8 +356,17 @@ void acelp_fast_fx( IF( s64 ) { Word16 new_exp1 = W_norm( s64 ); #ifdef OPT_241x_ACELP_FAST_2 cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // exp1 - 31 cncn_track_e[q] = sub( 63 - 1, new_exp1 ); #else cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_241x_ACELP_FAST cncn_track_e[q] = sub( 31, sub( add( add( shl( q_cn, 1 ), 1 ), new_exp1 ), 32 ) ); #else cncn_track_e[q] = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif #endif } cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e) } Loading @@ -348,8 +377,14 @@ void acelp_fast_fx( tmp = add( tmp, sub( dndn_e, cncn_e ) ); s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(15 - tmp) #ifdef OPT_241x_ACELP_FAST scale_temp1 = sub( Q16, tmp ); /* Q_dn = q_cn and it doesn't matter */ #else q_temp1 = add( add( sub( Q15, tmp ), q_cn ), Q1 ); scale_temp1 = sub( q_temp1, Q_dn ); #endif FOR( i = 0; i < L_subfr; i++ ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading @@ -360,6 +395,15 @@ void acelp_fast_fx( bn_orig_fx[i] = L_add( temp1, temp2 ); // Q_dn move32(); #ifdef OPT_241x_ACELP_FAST sign_fx[i] = -1; move16(); if( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; move16(); } #else IF( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; Loading @@ -369,6 +413,7 @@ void acelp_fast_fx( sign_fx[i] = -1; } move16(); #endif } /*-----------------------------------------------------------------* Loading Loading @@ -458,16 +503,49 @@ void acelp_fast_fx( test(); test(); /* skip certain tracks if number of pulses is lower than number of tracks */ #ifdef OPT_241x_ACELP_FAST_3 IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { max_track[NB_TRACK_FCB_4T - 3] = L_deposit_l( -1 ); move32(); max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } FOR( q = 0; q < nb_tracks; q++ ) { i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); max_track[i] = L_deposit_l( -1 ); move32(); } #else IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { #ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn move32(); max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn #else max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dncn ); // Q_dn move32(); max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn #endif move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { #ifndef OPT_241x_ACELP_FAST max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dn ); // Q_dn #else max_track[NB_TRACK_FCB_4T - 1] = L_shl( -1, Q_dncn ); // Q_dn #endif move32(); } Loading @@ -476,10 +554,14 @@ void acelp_fast_fx( i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); #ifndef OPT_241x_ACELP_FAST max_track[i] = L_shl( -1, Q_dn ); // Q_dn #else max_track[i] = L_shl( -1, Q_dncn ); // Q_dn #endif move32(); } #endif track_order[4] = track_order[1]; // Q0 move16(); track_order[5] = track_order[0]; // Q0 Loading Loading @@ -617,9 +699,13 @@ void acelp_fast_fx( move64(); FOR( i = track; i < L_subfr; i += nb_tracks ) { #ifdef OPT_241x_ACELP_FAST temp3 = L_msu0( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ); #else temp1 = L_mult0( Gd, dn_orig[i] ); temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_sub( temp1, temp2 ); #endif dn[i] = L_shr( temp3, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -627,8 +713,15 @@ void acelp_fast_fx( } exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifdef OPT_241x_ACELP_FAST_2 dndn_e = sub( 63 - 1, exp1 ); #else #ifndef OPT_241x_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), exp1 ), 32 ) ); #endif #endif IF( dndn_fx == 0 ) { dndn_fx = 214748365 /* 0.1f in Q31 */; Loading @@ -645,10 +738,16 @@ void acelp_fast_fx( move16(); m[1] = track; // Q0 move16(); #ifdef OPT_241x_ACELP_FAST scale_temp1 = sub( Q16, exp1 ); scale_temp2 = Q2; move16(); #else q_temp1 = add( add( sub( Q15, exp1 ), q_cn ), 1 ); q_temp2 = add( Q_dn, Q2 ); scale_temp1 = sub( q_temp1, Q_dn ); scale_temp2 = sub( q_temp2, Q_dn ); #endif FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading Loading @@ -683,7 +782,11 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[1], dn_orig[m[1]] ) ); // Q_dn Gd32 = Gd; move16(); #ifdef OPT_241x_ACELP_FAST Gd32 = L_add( Gd32, L_mac0( alp[0], i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ); // Q6 #else Gd32 = L_add( Gd32, L_add( alp[0], L_mult0( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -697,10 +800,15 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST temp4 = L_msu0( temp1, G, *alp_pos0 ); temp4 = L_msu0( temp4, G1, *alp_pos1 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_sub( temp1, temp2 ); temp4 = L_sub( temp4, temp3 ); #endif dn[i] = L_shr( temp4, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -720,9 +828,15 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[2], dn_orig[m[2]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_241x_ACELP_FAST temp2 = L_mac0(temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mac0(temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, temp3 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, L_add( L_add( temp1, temp2 ), temp3 ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -739,12 +853,21 @@ void acelp_fast_fx( { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST //temp2 = L_mult0( G, *alp_pos0 ); //temp3 = L_mult0( G1, *alp_pos1 ); //temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_msu0( temp1, G, *alp_pos0 ); temp5 = L_msu0( temp5, G1, *alp_pos1 ); temp5 = L_msu0( temp5, G2, *alp_pos2 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_sub( temp1, temp2 ); temp5 = L_sub( temp5, temp3 ); temp5 = L_sub( temp5, temp4 ); #endif dn[i] = L_shr( temp5, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -770,11 +893,19 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[3], dn_orig[m[3]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_241x_ACELP_FAST temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, temp2 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp4 = L_mult0( i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, L_add( L_add( L_add( temp1, temp2 ), temp3 ), temp4 ) ); // Q6 #endif G = Gn; move16(); // Q_dn G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -795,6 +926,12 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -803,6 +940,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -824,6 +962,12 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i++ ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_241x_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -832,6 +976,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move16(); alp_pos0++; Loading Loading @@ -885,6 +1030,13 @@ void acelp_fast_fx( FOR( j = 0; j < nb_pulse; j++ ) { #ifdef OPT_241x_ACELP_FAST p_hn = h_inv - m[j]; if( s[j] > 0 ) { p_hn = h - m[j]; } #else IF( s[j] > 0 ) { p_hn = h - m[j]; Loading @@ -893,7 +1045,7 @@ void acelp_fast_fx( { p_hn = h_inv - m[j]; } #endif FOR( i = 0; i < L_subfr; i++ ) { y_tmp[i] = add_sat( y_tmp[i], *p_hn++ ); // q_H Loading @@ -906,8 +1058,15 @@ void acelp_fast_fx( s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn exp = W_norm( s64 ); crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32 #ifdef OPT_241x_ACELP_FAST_2 q_crit_num = sub( exp, 32 ); #else #ifndef OPT_241x_ACELP_FAST q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) ); #else q_crit_num = add( shl( Q_dncn, 1 ), sub( exp, 32 ) ); #endif #endif // crit_den = sum2_fx( y_tmp, L_subfr ); // 2*q_H s64 = 0; move64(); Loading @@ -926,6 +1085,15 @@ void acelp_fast_fx( IF( GT_16( exp, exp1 ) ) { #ifdef OPT_241x_ACELP_FAST flag = 0; move16(); if ( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; move16(); } #else IF( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; Loading @@ -936,9 +1104,19 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } ELSE { #ifdef OPT_241x_ACELP_FAST flag = 0; move16(); if( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; move16(); } #else IF( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; Loading @@ -949,6 +1127,7 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } Loading
lib_enc/inov_enc_fx.c +8 −0 Original line number Diff line number Diff line Loading @@ -369,7 +369,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_241x_ACELP_FAST acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, h2, code, y2, L_subfr ); #else acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, Qcn, h2, code, y2, L_subfr ); #endif } } ELSE IF( ( EQ_16( st_fx->idchan, 1 ) && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 7 ) ) || ( st_fx->idchan == 0 && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 3 ) ) ) Loading @@ -380,7 +384,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_241x_ACELP_FAST acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, h2, code, y2, L_SUBFR ); #else acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, Qcn, h2, code, y2, L_SUBFR ); #endif } } ELSE Loading