Loading lib_com/ivas_prot_fx.h +6 −0 Original line number Diff line number Diff line Loading @@ -3031,9 +3031,15 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dn */ #ifdef OPT_2416_ACELP_FAST const Word16 Q_dncn, /* i : scaling factor of dn and cn */ #else Word16 Q_dn, #endif const Word16 cn[L_SUBFR], /* i : residual after long term prediction q_cn*/ #ifndef OPT_2416_ACELP_FAST const Word16 q_cn, #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation */ Word16 y[], /* o : filtered fixed codebook excitation */ Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -95,6 +95,7 @@ #define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ #define FIX_2392_MSAN_DESTROY_DEC /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */ #define FIX_FLOAT_1522_LTV_MSAN_QMETADATA_ENC_EC3 /* Nokia: float issue 1522: fix uninit MSAN in EC3 of qmetadata encoding */ #define OPT_2416_ACELP_FAST /* VA: basop issue 2426, optimisation of acelp_fast_fx ( reduc. compl. by 0.35 wmops ) */ #define FIX_2410_HARM_MODIF_FS /* VA: basop issue 2410: Remove duplicated modif_Fs */ #define HARM_LEV_DURBIN /* VA: basop issue 2423: harmonize levinson-Durbin algorithm */ #define HARMONIZE_TBE2 /* VA: basop issue 2399: Remove duplicated code: TBE, step 2 */ Loading lib_enc/cod4t64_fast_fx.c +164 −27 Original line number Diff line number Diff line Loading @@ -42,11 +42,8 @@ * Local constants *-------------------------------------------------------------------*/ #define BETA_BN1 2.0f #define BETA_BN2 2.25f #define BETA_BN1_FX 2 // Q0 #define BETA_BN2_FX 9 // Q2 #define BETA_BN1_FX 2 // 2.0f in Q0 #define BETA_BN2_FX 9 // 2.25f in Q2 #define L_SUBFR_MAX 2 * L_SUBFR #define MAX_NUM_INTER 5 Loading Loading @@ -76,6 +73,8 @@ static Word16 quant_1p_N1_L_subfr_fx( return index; } static Word16 find_best_pulse_fx( const Word16 L_subfr, const Word16 nb_tracks, Loading Loading @@ -111,6 +110,7 @@ static Word16 find_best_pulse_fx( return m; } /*-------------------------------------------------------------------* * Function acelp_fast() * Loading @@ -121,18 +121,19 @@ static Word16 find_best_pulse_fx( void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. */ // Q_dn const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dncn */ #ifdef OPT_2416_ACELP_FAST const Word16 Q_dncn, /* i : scaling factor of dn and cn */ #else Word16 Q_dn, const Word16 cn[L_SUBFR], /* i : residual after long term prediction */ // q_cn #endif const Word16 cn[L_SUBFR], /* i : residual after long term prediction Q_dncn */ #ifndef OPT_2416_ACELP_FAST const Word16 q_cn, const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter */ // e(norm_s(H[0])+1) Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation */ // Q0 Word16 y[], /* o : filtered fixed codebook excitation */ // e(norm_s(H[0])+1) #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation Q0 */ Word16 y[], /* o : filtered fixed codebook excitation e(norm_s(H[0])+1) */ const Word16 L_subfr /* i : subframe length */ ) { Loading Loading @@ -163,8 +164,11 @@ void acelp_fast_fx( Word16 flag = 0; move16(); Word32 temp1, temp2, temp3, temp4, temp5, temp6; #ifndef OPT_2416_ACELP_FAST Word16 q_temp1, q_temp2; #endif Word16 scale_temp1, scale_temp2; /*-----------------------------------------------------------------* * Initialization *-----------------------------------------------------------------*/ Loading Loading @@ -223,7 +227,6 @@ void acelp_fast_fx( codetrackpos = -1; /* to avoid compilation warnings */ move16(); IF( EQ_16( cdk_index, 14 ) ) { /* 14 bits, 2 pulses, 2 tracks: 11 (used all tracks) */ Loading Loading @@ -295,8 +298,9 @@ void acelp_fast_fx( * Find signal bn[] and sign pre-selection vector sign[]. *-----------------------------------------------------------------*/ #ifndef OPT_2416_ACELP_FAST exp = sub( Q31, shl( Q_dn, 1 ) ); #endif s64 = 0; move64(); FOR( i = 0; i < L_subfr; i++ ) Loading @@ -311,10 +315,13 @@ void acelp_fast_fx( { Word16 new_exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_2416_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), new_exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif } cncn_fx = 214748365 /* 0.1f in Q31 */; move32(); cncn_e = 0; Loading @@ -337,7 +344,11 @@ void acelp_fast_fx( { Word16 new_exp1 = W_norm( s64 ); cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_2416_ACELP_FAST cncn_track_e[q] = sub( 31, sub( add( add( shl( q_cn, 1 ), 1 ), new_exp1 ), 32 ) ); #else cncn_track_e[q] = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif } cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e) } Loading @@ -348,8 +359,14 @@ void acelp_fast_fx( tmp = add( tmp, sub( dndn_e, cncn_e ) ); s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(15 - tmp) #ifdef OPT_2416_ACELP_FAST scale_temp1 = sub( Q16, tmp ); /* Q_dn = q_cn and it doesn't matter */ #else q_temp1 = add( add( sub( Q15, tmp ), q_cn ), Q1 ); scale_temp1 = sub( q_temp1, Q_dn ); #endif FOR( i = 0; i < L_subfr; i++ ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading @@ -360,6 +377,15 @@ void acelp_fast_fx( bn_orig_fx[i] = L_add( temp1, temp2 ); // Q_dn move32(); #ifdef OPT_2416_ACELP_FAST sign_fx[i] = -1; move16(); if ( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; move16(); } #else IF( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; Loading @@ -369,6 +395,7 @@ void acelp_fast_fx( sign_fx[i] = -1; } move16(); #endif } /*-----------------------------------------------------------------* Loading Loading @@ -397,6 +424,7 @@ void acelp_fast_fx( /*-----------------------------------------------------------------* * Approximate FI[i][j] by alp[abs(i-j)] and compute buffer alp_buf[]. *-----------------------------------------------------------------*/ q_H = sub( 14, norm_s( H[0] ) ); shift = sub( shl( q_H, 1 ), 6 ); Loading Loading @@ -458,6 +486,29 @@ void acelp_fast_fx( test(); test(); /* skip certain tracks if number of pulses is lower than number of tracks */ #ifdef OPT_2416_ACELP_FAST /* Just need a negative number, it doesn't need to be scaled */ IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { max_track[NB_TRACK_FCB_4T - 3] = L_deposit_l( -1 ); move32(); max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } FOR( q = 0; q < nb_tracks; q++ ) { i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); max_track[i] = L_deposit_l( -1 ); move32(); } #else IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn Loading @@ -479,6 +530,7 @@ void acelp_fast_fx( max_track[i] = L_shl( -1, Q_dn ); // Q_dn move32(); } #endif track_order[4] = track_order[1]; // Q0 move16(); Loading Loading @@ -617,9 +669,13 @@ void acelp_fast_fx( move64(); FOR( i = track; i < L_subfr; i += nb_tracks ) { #ifdef OPT_2416_ACELP_FAST temp3 = L_msu0( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ); #else temp1 = L_mult0( Gd, dn_orig[i] ); temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_sub( temp1, temp2 ); #endif dn[i] = L_shr( temp3, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -627,8 +683,11 @@ void acelp_fast_fx( } exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_2416_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), exp1 ), 32 ) ); #endif IF( dndn_fx == 0 ) { dndn_fx = 214748365 /* 0.1f in Q31 */; Loading @@ -645,10 +704,16 @@ void acelp_fast_fx( move16(); m[1] = track; // Q0 move16(); #ifdef OPT_2416_ACELP_FAST scale_temp1 = sub( Q16, exp1 ); scale_temp2 = Q2; move16(); #else q_temp1 = add( add( sub( Q15, exp1 ), q_cn ), 1 ); q_temp2 = add( Q_dn, Q2 ); scale_temp1 = sub( q_temp1, Q_dn ); scale_temp2 = sub( q_temp2, Q_dn ); #endif FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading Loading @@ -683,7 +748,11 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[1], dn_orig[m[1]] ) ); // Q_dn Gd32 = Gd; move16(); #ifdef OPT_2416_ACELP_FAST Gd32 = L_add( Gd32, L_mac0( alp[0], i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ); // Q6 #else Gd32 = L_add( Gd32, L_add( alp[0], L_mult0( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -697,10 +766,15 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp4 = L_msu0( temp1, G, *alp_pos0 ); temp4 = L_msu0( temp4, G1, *alp_pos1 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_sub( temp1, temp2 ); temp4 = L_sub( temp4, temp3 ); #endif dn[i] = L_shr( temp4, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -720,9 +794,15 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[2], dn_orig[m[2]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_2416_ACELP_FAST temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, temp3 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, L_add( L_add( temp1, temp2 ), temp3 ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -739,12 +819,18 @@ void acelp_fast_fx( { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp5 = L_msu0( temp1, G, *alp_pos0 ); temp5 = L_msu0( temp5, G1, *alp_pos1 ); temp5 = L_msu0( temp5, G2, *alp_pos2 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_sub( temp1, temp2 ); temp5 = L_sub( temp5, temp3 ); temp5 = L_sub( temp5, temp4 ); #endif dn[i] = L_shr( temp5, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -770,11 +856,19 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[3], dn_orig[m[3]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_2416_ACELP_FAST temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, temp2 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp4 = L_mult0( i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, L_add( L_add( L_add( temp1, temp2 ), temp3 ), temp4 ) ); // Q6 #endif G = Gn; move16(); // Q_dn G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -795,6 +889,12 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -803,6 +903,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -824,6 +925,12 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i++ ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -832,6 +939,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move16(); alp_pos0++; Loading Loading @@ -885,6 +993,13 @@ void acelp_fast_fx( FOR( j = 0; j < nb_pulse; j++ ) { #ifdef OPT_2416_ACELP_FAST p_hn = h_inv - m[j]; if ( s[j] > 0 ) { p_hn = h - m[j]; } #else IF( s[j] > 0 ) { p_hn = h - m[j]; Loading @@ -893,7 +1008,7 @@ void acelp_fast_fx( { p_hn = h_inv - m[j]; } #endif FOR( i = 0; i < L_subfr; i++ ) { y_tmp[i] = add_sat( y_tmp[i], *p_hn++ ); // q_H Loading @@ -906,8 +1021,11 @@ void acelp_fast_fx( s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn exp = W_norm( s64 ); crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32 #ifndef OPT_2416_ACELP_FAST q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) ); #else q_crit_num = add( shl( Q_dncn, 1 ), sub( exp, 32 ) ); #endif // crit_den = sum2_fx( y_tmp, L_subfr ); // 2*q_H s64 = 0; move64(); Loading @@ -926,6 +1044,15 @@ void acelp_fast_fx( IF( GT_16( exp, exp1 ) ) { #ifdef OPT_2416_ACELP_FAST flag = 0; move16(); if ( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; move16(); } #else IF( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; Loading @@ -936,9 +1063,19 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } ELSE { #ifdef OPT_2416_ACELP_FAST flag = 0; move16(); if ( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; move16(); } #else IF( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; Loading @@ -949,9 +1086,9 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } IF( flag ) { crit_num_max = crit_num; Loading lib_enc/inov_enc_fx.c +8 −0 Original line number Diff line number Diff line Loading @@ -369,7 +369,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_2416_ACELP_FAST acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, h2, code, y2, L_subfr ); #else acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, Qcn, h2, code, y2, L_subfr ); #endif } } ELSE IF( ( EQ_16( st_fx->idchan, 1 ) && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 7 ) ) || ( st_fx->idchan == 0 && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 3 ) ) ) Loading @@ -380,7 +384,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_2416_ACELP_FAST acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, h2, code, y2, L_SUBFR ); #else acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, Qcn, h2, code, y2, L_SUBFR ); #endif } } ELSE Loading Loading
lib_com/ivas_prot_fx.h +6 −0 Original line number Diff line number Diff line Loading @@ -3031,9 +3031,15 @@ void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dn */ #ifdef OPT_2416_ACELP_FAST const Word16 Q_dncn, /* i : scaling factor of dn and cn */ #else Word16 Q_dn, #endif const Word16 cn[L_SUBFR], /* i : residual after long term prediction q_cn*/ #ifndef OPT_2416_ACELP_FAST const Word16 q_cn, #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation */ Word16 y[], /* o : filtered fixed codebook excitation */ Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -95,6 +95,7 @@ #define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */ #define FIX_2392_MSAN_DESTROY_DEC /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */ #define FIX_FLOAT_1522_LTV_MSAN_QMETADATA_ENC_EC3 /* Nokia: float issue 1522: fix uninit MSAN in EC3 of qmetadata encoding */ #define OPT_2416_ACELP_FAST /* VA: basop issue 2426, optimisation of acelp_fast_fx ( reduc. compl. by 0.35 wmops ) */ #define FIX_2410_HARM_MODIF_FS /* VA: basop issue 2410: Remove duplicated modif_Fs */ #define HARM_LEV_DURBIN /* VA: basop issue 2423: harmonize levinson-Durbin algorithm */ #define HARMONIZE_TBE2 /* VA: basop issue 2399: Remove duplicated code: TBE, step 2 */ Loading
lib_enc/cod4t64_fast_fx.c +164 −27 Original line number Diff line number Diff line Loading @@ -42,11 +42,8 @@ * Local constants *-------------------------------------------------------------------*/ #define BETA_BN1 2.0f #define BETA_BN2 2.25f #define BETA_BN1_FX 2 // Q0 #define BETA_BN2_FX 9 // Q2 #define BETA_BN1_FX 2 // 2.0f in Q0 #define BETA_BN2_FX 9 // 2.25f in Q2 #define L_SUBFR_MAX 2 * L_SUBFR #define MAX_NUM_INTER 5 Loading Loading @@ -76,6 +73,8 @@ static Word16 quant_1p_N1_L_subfr_fx( return index; } static Word16 find_best_pulse_fx( const Word16 L_subfr, const Word16 nb_tracks, Loading Loading @@ -111,6 +110,7 @@ static Word16 find_best_pulse_fx( return m; } /*-------------------------------------------------------------------* * Function acelp_fast() * Loading @@ -121,18 +121,19 @@ static Word16 find_best_pulse_fx( void acelp_fast_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 cdk_index, /* i : codebook index */ const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. */ // Q_dn const Word16 dn_orig[L_SUBFR], /* i : corr. between target and h[]. Q_dncn */ #ifdef OPT_2416_ACELP_FAST const Word16 Q_dncn, /* i : scaling factor of dn and cn */ #else Word16 Q_dn, const Word16 cn[L_SUBFR], /* i : residual after long term prediction */ // q_cn #endif const Word16 cn[L_SUBFR], /* i : residual after long term prediction Q_dncn */ #ifndef OPT_2416_ACELP_FAST const Word16 q_cn, const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter */ // e(norm_s(H[0])+1) Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation */ // Q0 Word16 y[], /* o : filtered fixed codebook excitation */ // e(norm_s(H[0])+1) #endif const Word16 H[L_SUBFR], /* i : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */ Word16 code[L_SUBFR], /* o : algebraic (fixed) codebook excitation Q0 */ Word16 y[], /* o : filtered fixed codebook excitation e(norm_s(H[0])+1) */ const Word16 L_subfr /* i : subframe length */ ) { Loading Loading @@ -163,8 +164,11 @@ void acelp_fast_fx( Word16 flag = 0; move16(); Word32 temp1, temp2, temp3, temp4, temp5, temp6; #ifndef OPT_2416_ACELP_FAST Word16 q_temp1, q_temp2; #endif Word16 scale_temp1, scale_temp2; /*-----------------------------------------------------------------* * Initialization *-----------------------------------------------------------------*/ Loading Loading @@ -223,7 +227,6 @@ void acelp_fast_fx( codetrackpos = -1; /* to avoid compilation warnings */ move16(); IF( EQ_16( cdk_index, 14 ) ) { /* 14 bits, 2 pulses, 2 tracks: 11 (used all tracks) */ Loading Loading @@ -295,8 +298,9 @@ void acelp_fast_fx( * Find signal bn[] and sign pre-selection vector sign[]. *-----------------------------------------------------------------*/ #ifndef OPT_2416_ACELP_FAST exp = sub( Q31, shl( Q_dn, 1 ) ); #endif s64 = 0; move64(); FOR( i = 0; i < L_subfr; i++ ) Loading @@ -311,10 +315,13 @@ void acelp_fast_fx( { Word16 new_exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_2416_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), new_exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif } cncn_fx = 214748365 /* 0.1f in Q31 */; move32(); cncn_e = 0; Loading @@ -337,7 +344,11 @@ void acelp_fast_fx( { Word16 new_exp1 = W_norm( s64 ); cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_2416_ACELP_FAST cncn_track_e[q] = sub( 31, sub( add( add( shl( q_cn, 1 ), 1 ), new_exp1 ), 32 ) ); #else cncn_track_e[q] = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) ); #endif } cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e) } Loading @@ -348,8 +359,14 @@ void acelp_fast_fx( tmp = add( tmp, sub( dndn_e, cncn_e ) ); s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(15 - tmp) #ifdef OPT_2416_ACELP_FAST scale_temp1 = sub( Q16, tmp ); /* Q_dn = q_cn and it doesn't matter */ #else q_temp1 = add( add( sub( Q15, tmp ), q_cn ), Q1 ); scale_temp1 = sub( q_temp1, Q_dn ); #endif FOR( i = 0; i < L_subfr; i++ ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading @@ -360,6 +377,15 @@ void acelp_fast_fx( bn_orig_fx[i] = L_add( temp1, temp2 ); // Q_dn move32(); #ifdef OPT_2416_ACELP_FAST sign_fx[i] = -1; move16(); if ( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; move16(); } #else IF( bn_orig_fx[i] >= 0 ) { sign_fx[i] = 1; Loading @@ -369,6 +395,7 @@ void acelp_fast_fx( sign_fx[i] = -1; } move16(); #endif } /*-----------------------------------------------------------------* Loading Loading @@ -397,6 +424,7 @@ void acelp_fast_fx( /*-----------------------------------------------------------------* * Approximate FI[i][j] by alp[abs(i-j)] and compute buffer alp_buf[]. *-----------------------------------------------------------------*/ q_H = sub( 14, norm_s( H[0] ) ); shift = sub( shl( q_H, 1 ), 6 ); Loading Loading @@ -458,6 +486,29 @@ void acelp_fast_fx( test(); test(); /* skip certain tracks if number of pulses is lower than number of tracks */ #ifdef OPT_2416_ACELP_FAST /* Just need a negative number, it doesn't need to be scaled */ IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { max_track[NB_TRACK_FCB_4T - 3] = L_deposit_l( -1 ); move32(); max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) ) { max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 ); move32(); } FOR( q = 0; q < nb_tracks; q++ ) { i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 ); track_order[q] = i; move16(); max_track[i] = L_deposit_l( -1 ); move32(); } #else IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) ) { max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn Loading @@ -479,6 +530,7 @@ void acelp_fast_fx( max_track[i] = L_shl( -1, Q_dn ); // Q_dn move32(); } #endif track_order[4] = track_order[1]; // Q0 move16(); Loading Loading @@ -617,9 +669,13 @@ void acelp_fast_fx( move64(); FOR( i = track; i < L_subfr; i += nb_tracks ) { #ifdef OPT_2416_ACELP_FAST temp3 = L_msu0( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 ); #else temp1 = L_mult0( Gd, dn_orig[i] ); temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_sub( temp1, temp2 ); #endif dn[i] = L_shr( temp3, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -627,8 +683,11 @@ void acelp_fast_fx( } exp1 = W_norm( s64 ); dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31 #ifndef OPT_2416_ACELP_FAST dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), exp1 ), 32 ) ); #else dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), exp1 ), 32 ) ); #endif IF( dndn_fx == 0 ) { dndn_fx = 214748365 /* 0.1f in Q31 */; Loading @@ -645,10 +704,16 @@ void acelp_fast_fx( move16(); m[1] = track; // Q0 move16(); #ifdef OPT_2416_ACELP_FAST scale_temp1 = sub( Q16, exp1 ); scale_temp2 = Q2; move16(); #else q_temp1 = add( add( sub( Q15, exp1 ), q_cn ), 1 ); q_temp2 = add( Q_dn, Q2 ); scale_temp1 = sub( q_temp1, Q_dn ); scale_temp2 = sub( q_temp2, Q_dn ); #endif FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = L_mult( s_coef_fx, cn[i] ); // Q(15 - tmp)+q_cn+1 Loading Loading @@ -683,7 +748,11 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[1], dn_orig[m[1]] ) ); // Q_dn Gd32 = Gd; move16(); #ifdef OPT_2416_ACELP_FAST Gd32 = L_add( Gd32, L_mac0( alp[0], i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ); // Q6 #else Gd32 = L_add( Gd32, L_add( alp[0], L_mult0( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -697,10 +766,15 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp4 = L_msu0( temp1, G, *alp_pos0 ); temp4 = L_msu0( temp4, G1, *alp_pos1 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_sub( temp1, temp2 ); temp4 = L_sub( temp4, temp3 ); #endif dn[i] = L_shr( temp4, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -720,9 +794,15 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[2], dn_orig[m[2]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_2416_ACELP_FAST temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, temp3 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] ); Gd32 = L_add( Gd32, L_add( L_add( temp1, temp2 ), temp3 ) ); // Q6 #endif G = Gn; // Q_dn move16(); G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -739,12 +819,18 @@ void acelp_fast_fx( { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp5 = L_msu0( temp1, G, *alp_pos0 ); temp5 = L_msu0( temp5, G1, *alp_pos1 ); temp5 = L_msu0( temp5, G2, *alp_pos2 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); temp5 = L_sub( temp1, temp2 ); temp5 = L_sub( temp5, temp3 ); temp5 = L_sub( temp5, temp4 ); #endif dn[i] = L_shr( temp5, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -770,11 +856,19 @@ void acelp_fast_fx( Gn = add( Gn, i_mult( s[3], dn_orig[m[3]] ) ); // Q_dn temp1 = alp[0]; move32(); #ifdef OPT_2416_ACELP_FAST temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp2 = L_mac0( temp2, i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, temp2 ); // Q6 #else temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] ); temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] ); temp4 = L_mult0( i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] ); Gd32 = L_add( Gd32, L_add( L_add( L_add( temp1, temp2 ), temp3 ), temp4 ) ); // Q6 #endif G = Gn; move16(); // Q_dn G1 = i_mult( G, s[1] ); // Q_dn Loading @@ -795,6 +889,12 @@ void acelp_fast_fx( FOR( i = track; i < L_subfr; i += nb_tracks ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -803,6 +903,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move32(); alp_pos0 += nb_tracks; Loading @@ -824,6 +925,12 @@ void acelp_fast_fx( FOR( i = 0; i < L_subfr; i++ ) { temp1 = imult3216( Gd32, dn_orig[i] ); #ifdef OPT_2416_ACELP_FAST temp6 = L_msu0( temp1, G, *alp_pos0 ); temp6 = L_msu0( temp6, G1, *alp_pos1 ); temp6 = L_msu0( temp6, G2, *alp_pos2 ); temp6 = L_msu0( temp6, G3, *alp_pos3 ); #else temp2 = L_mult0( G, *alp_pos0 ); temp3 = L_mult0( G1, *alp_pos1 ); temp4 = L_mult0( G2, *alp_pos2 ); Loading @@ -832,6 +939,7 @@ void acelp_fast_fx( temp6 = L_sub( temp6, temp3 ); temp6 = L_sub( temp6, temp4 ); temp6 = L_sub( temp6, temp5 ); #endif dn[i] = L_shr( temp6, 6 ); move16(); alp_pos0++; Loading Loading @@ -885,6 +993,13 @@ void acelp_fast_fx( FOR( j = 0; j < nb_pulse; j++ ) { #ifdef OPT_2416_ACELP_FAST p_hn = h_inv - m[j]; if ( s[j] > 0 ) { p_hn = h - m[j]; } #else IF( s[j] > 0 ) { p_hn = h - m[j]; Loading @@ -893,7 +1008,7 @@ void acelp_fast_fx( { p_hn = h_inv - m[j]; } #endif FOR( i = 0; i < L_subfr; i++ ) { y_tmp[i] = add_sat( y_tmp[i], *p_hn++ ); // q_H Loading @@ -906,8 +1021,11 @@ void acelp_fast_fx( s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn exp = W_norm( s64 ); crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32 #ifndef OPT_2416_ACELP_FAST q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) ); #else q_crit_num = add( shl( Q_dncn, 1 ), sub( exp, 32 ) ); #endif // crit_den = sum2_fx( y_tmp, L_subfr ); // 2*q_H s64 = 0; move64(); Loading @@ -926,6 +1044,15 @@ void acelp_fast_fx( IF( GT_16( exp, exp1 ) ) { #ifdef OPT_2416_ACELP_FAST flag = 0; move16(); if ( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; move16(); } #else IF( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) ) { flag = 1; Loading @@ -936,9 +1063,19 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } ELSE { #ifdef OPT_2416_ACELP_FAST flag = 0; move16(); if ( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; move16(); } #else IF( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) ) { flag = 1; Loading @@ -949,9 +1086,9 @@ void acelp_fast_fx( flag = 0; move16(); } #endif } IF( flag ) { crit_num_max = crit_num; Loading
lib_enc/inov_enc_fx.c +8 −0 Original line number Diff line number Diff line Loading @@ -369,7 +369,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_2416_ACELP_FAST acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, h2, code, y2, L_subfr ); #else acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, Qcn, h2, code, y2, L_subfr ); #endif } } ELSE IF( ( EQ_16( st_fx->idchan, 1 ) && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 7 ) ) || ( st_fx->idchan == 0 && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 3 ) ) ) Loading @@ -380,7 +384,11 @@ Word16 inov_encode_fx( } ELSE { #ifdef OPT_2416_ACELP_FAST acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, h2, code, y2, L_SUBFR ); #else acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, Qcn, h2, code, y2, L_SUBFR ); #endif } } ELSE Loading