From e691f83e33322436ae455c3f00f0aaecb6029122 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 23 Dec 2024 14:49:03 +0530 Subject: [PATCH] MLD improvements: ACELP to match the IVAS float code, norm_corr_fx saturation removal --- lib_enc/acelp_core_enc.c | 2 + lib_enc/enc_gen_voic_fx.c | 6 +- lib_enc/enc_higher_acelp_fx.c | 4 +- lib_enc/enc_pit_exc_fx.c | 6 +- lib_enc/enc_tran_fx.c | 2 +- lib_enc/pit_enc_fx.c | 305 +++++++++++++++++++++++++++++++++- lib_enc/prot_fx_enc.h | 24 ++- lib_enc/transition_enc_fx.c | 24 ++- 8 files changed, 353 insertions(+), 20 deletions(-) diff --git a/lib_enc/acelp_core_enc.c b/lib_enc/acelp_core_enc.c index d3d542f32..241930867 100644 --- a/lib_enc/acelp_core_enc.c +++ b/lib_enc/acelp_core_enc.c @@ -550,6 +550,8 @@ ivas_error acelp_core_enc( { st->mem_deemp_preQ_fx = 0; move16(); + st->mem_preemp_preQ_fx = 0; + move16(); st->last_code_preq = 0; move16(); st->last_nq_preQ = 0; diff --git a/lib_enc/enc_gen_voic_fx.c b/lib_enc/enc_gen_voic_fx.c index 856a4312f..32d3c7a1a 100644 --- a/lib_enc/enc_gen_voic_fx.c +++ b/lib_enc/enc_gen_voic_fx.c @@ -603,8 +603,8 @@ void encod_gen_voic_ivas_fx( * LP filtering of the adaptive excitation, codebook target computation *-----------------------------------------------------------------*/ - lp_select = lp_filt_exc_enc_fx( MODE1, st_fx->coder_type, i_subfr_fx, exc_fx, h1_fx, - xn_fx, y1_fx, xn2_fx, L_SUBFR, L_frame, g_corr_fx, clip_gain_fx, &gain_pit_fx, &lp_flag ); + lp_select = lp_filt_exc_enc_ivas_fx( MODE1, st_fx->coder_type, i_subfr_fx, exc_fx, h1_fx, + xn_fx, y1_fx, xn2_fx, L_SUBFR, L_frame, g_corr_fx, clip_gain_fx, &gain_pit_fx, &lp_flag ); IF( EQ_16( lp_flag, NORMAL_OPERATION ) ) { @@ -675,7 +675,7 @@ void encod_gen_voic_ivas_fx( gcode16 = round_fx( Lgcode ); #endif - hLPDmem->tilt_code = Est_tilt2( &exc_fx[i_subfr_fx], gain_pit_fx, code_fx, Lgcode, &voice_fac_fx, shift ); + hLPDmem->tilt_code = est_tilt_ivas_fx( &exc_fx[i_subfr_fx], gain_pit_fx, code_fx, gain_code_fx, &voice_fac_fx, Q_new, L_SUBFR, 0 ); /*-----------------------------------------------------------------* * Transform domain contribution encoding - inactive frames diff --git a/lib_enc/enc_higher_acelp_fx.c b/lib_enc/enc_higher_acelp_fx.c index da58fe721..13c6afb40 100644 --- a/lib_enc/enc_higher_acelp_fx.c +++ b/lib_enc/enc_higher_acelp_fx.c @@ -772,13 +772,13 @@ void transf_cdbk_enc_ivas_fx( IF( GT_16( st_fx->element_mode, EVS_MONO ) && NE_16( st_fx->coder_type, INACTIVE ) && GE_32( st_fx->core_brate, MIN_BRATE_AVQ_EXC ) && LE_32( st_fx->core_brate, MAX_BRATE_AVQ_EXC_TD ) && !harm_flag_acelp && code_preQ[0] != 0 ) { // if ( (float) abs( st->last_code_preq ) > 16.0f * (float) fabs( code_preQ[0] ) ) - if ( GT_16( abs_s( st_fx->last_code_preq ), shl_sat( abs_s( code_preQ[0] ), 4 ) ) ) + IF( GT_16( abs_s( st_fx->last_code_preq ), shl_sat( abs_s( code_preQ[0] ), 4 ) ) ) { st_fx->mem_preemp_preQ_fx = shr( st_fx->mem_preemp_preQ_fx, 4 ); move16(); } // else if ( (float) abs( st->last_code_preq ) > 8.0f * (float) fabs( code_preQ[0] ) ) - if ( GT_16( abs_s( st_fx->last_code_preq ), shl_sat( abs_s( code_preQ[0] ), 3 ) ) ) + ELSE IF( GT_16( abs_s( st_fx->last_code_preq ), shl_sat( abs_s( code_preQ[0] ), 3 ) ) ) { st_fx->mem_preemp_preQ_fx = shr( st_fx->mem_preemp_preQ_fx, 3 ); move16(); diff --git a/lib_enc/enc_pit_exc_fx.c b/lib_enc/enc_pit_exc_fx.c index 08f6a7a3b..78fe82979 100644 --- a/lib_enc/enc_pit_exc_fx.c +++ b/lib_enc/enc_pit_exc_fx.c @@ -798,8 +798,8 @@ void enc_pit_exc_ivas_fx( * (No LP filtering of the adaptive excitation) *-----------------------------------------------------------------*/ - lp_select = lp_filt_exc_enc_fx( MODE1, AUDIO, i_subfr, exc, h1, - xn, y1, xn2, L_subfr, st_fx->L_frame, g_corr, clip_gain, &gain_pit, &lp_flag ); + lp_select = lp_filt_exc_enc_ivas_fx( MODE1, AUDIO, i_subfr, exc, h1, + xn, y1, xn2, L_subfr, st_fx->L_frame, g_corr, clip_gain, &gain_pit, &lp_flag ); IF( EQ_16( lp_flag, NORMAL_OPERATION ) ) { @@ -877,7 +877,7 @@ void enc_pit_exc_ivas_fx( #endif IF( use_fcb != 0 ) { - hLPDmem->tilt_code = Est_tilt2( &exc[i_subfr], gain_pit, code, Lgcode, &voice_fac, shift ); + hLPDmem->tilt_code = est_tilt_ivas_fx( &exc[i_subfr], gain_pit, code, gain_code, &voice_fac, Q_new, L_SUBFR, 0 ); move16(); } ELSE diff --git a/lib_enc/enc_tran_fx.c b/lib_enc/enc_tran_fx.c index c7f7bcd51..918762a14 100644 --- a/lib_enc/enc_tran_fx.c +++ b/lib_enc/enc_tran_fx.c @@ -638,7 +638,7 @@ Word16 encod_tran_ivas_fx( Lgcode = L_shl( gain_code, Q_new ); /* scaled gain_code with Qnew -> Q16*/ gcode16 = round_fx( Lgcode ); #endif - hLPDmem->tilt_code = Est_tilt2( &exc_fx[i_subfr], gain_pit, code, Lgcode, &voice_fac, shift ); + hLPDmem->tilt_code = est_tilt_ivas_fx( &exc_fx[i_subfr], gain_pit, code, gain_code, &voice_fac, Q_new, L_SUBFR, 0 ); /*-----------------------------------------------------------------* * Update memory of the weighting filter *-----------------------------------------------------------------*/ diff --git a/lib_enc/pit_enc_fx.c b/lib_enc/pit_enc_fx.c index ea09805be..f0c37afe5 100644 --- a/lib_enc/pit_enc_fx.c +++ b/lib_enc/pit_enc_fx.c @@ -781,7 +781,7 @@ Word16 pit_encode_ivas_fx( /* o : Fractional } ELSE IF( EQ_16( nBits, 10 ) ) { - *T0 = pitch_fr4_fx( &exc[i_subfr], xn, h1, *T0_min, *T0_max, T0_frac, pit_flag, *limit_flag, PIT_MAX, PIT_MAX, L_FRAME, L_SUBFR ); + *T0 = pitch_fr4_ivas_fx( &exc[i_subfr], xn, h1, *T0_min, *T0_max, T0_frac, pit_flag, *limit_flag, PIT_MAX, PIT_MAX, L_FRAME, L_SUBFR ); } pit_Q_enc_ivas_fx( hBstr, 0, nBits, delta, pit_flag, *limit_flag, *T0, *T0_frac, T0_min, T0_max ); @@ -1129,7 +1129,196 @@ Word16 delta_pit_enc_fx( /* o : pitch index * * Find the closed loop pitch period with 1/4 subsample resolution. *-------------------------------------------------------------------*/ +Word16 pitch_fr4_ivas_fx( /* o : chosen integer pitch lag */ + const Word16 exc[], /* i : excitation buffer Q_new*/ + const Word16 xn[], /* i : target signal Q_new-1+shift*/ + const Word16 h[], /* i : weighted synthesis filter impulse response Q(14+shift)*/ + const Word16 t0_min, /* i : minimum value in the searched range. Q0*/ + const Word16 t0_max, /* i : maximum value in the searched range. Q0*/ + Word16 *pit_frac, /* o : chosen fraction (0, 1, 2 or 3) */ + const Word16 i_subfr, /* i : flag to first subframe */ + const Word16 limit_flag, /* i : flag for limits (0=restrained, 1=extended) */ + const Word16 t0_fr2, /* i : minimum value for resolution 1/2 */ + const Word16 t0_fr1, /* i : minimum value for resolution 1 */ + const Word16 L_frame, /* i : length of the frame */ + const Word16 L_subfr /* i : size of subframe */ +) +{ + Word16 i; + Word16 t_min, t_max; + Word16 max_val, t0, t1, fraction, step, temp; + Word16 *corr; + Word16 corr_v[15 + 2 * L_INTERPOL1 + 1]; /* Total length = t0_max-t0_min+1+2*L_inter */ + Word16 pit_min; + Word16 cor_max; + + /* initialization */ + IF( limit_flag == 0 ) + { + IF( EQ_16( L_frame, L_FRAME ) ) + { + pit_min = PIT_MIN; + move16(); + } + ELSE /* L_frame == L_FRAME16k */ + { + pit_min = PIT16k_MIN; + move16(); + } + } + ELSE + { + IF( EQ_16( L_frame, L_FRAME ) ) + { + pit_min = PIT_MIN_EXTEND; + move16(); + IF( EQ_16( limit_flag, 2 ) ) + { + pit_min = PIT_MIN_DOUBLEEXTEND; + move16(); + } + } + ELSE /* L_frame == L_FRAME16k */ + { + pit_min = PIT16k_MIN_EXTEND; + move16(); + } + } + + /*-----------------------------------------------------------------* + * - Find interval to compute normalized correlation + * - allocate memory to normalized correlation vector + * - Compute normalized correlation between target and filtered + * excitation + *-----------------------------------------------------------------*/ + + t_min = sub( t0_min, L_INTERPOL1 ); + t_max = add( t0_max, L_INTERPOL1 ); + corr = &corr_v[-t_min]; + move16(); + move16(); /* corr[t_min..t_max] */ + + norm_corr_ivas_fx( exc, xn, h, t_min, t_max, corr, L_subfr ); + + /*-----------------------------------------------------------------* + * Find integer pitch + *-----------------------------------------------------------------*/ + + max_val = corr[t0_min]; + move16(); + t0 = t0_min; + move16(); + + FOR( i = add( t0_min, 1 ); i <= t0_max; i++ ) + { + if ( corr[i] >= max_val ) + { + t0 = i; + move16(); + } + max_val = s_max( corr[i], max_val ); + } + + IF( EQ_16( t0_fr1, pit_min ) ) + { + /* don't search fraction (for 7b/4b quant) */ + test(); + IF( ( i_subfr == 0 ) && ( GE_16( t0, t0_fr2 ) ) ) + { + i = shl( shr( t0, 1 ), 1 ); /* 2 samples resolution */ + if ( GT_16( add( i, 2 ), PIT_MAX ) ) + { + i = sub( i, 2 ); + } + IF( GT_16( corr[i], corr[i + 2] ) ) + { + t0 = i; + move16(); + } + ELSE + { + t0 = add( i, 2 ); + } + } + *pit_frac = 0; + move16(); + + return ( t0 ); + } + + test(); + IF( ( i_subfr == 0 ) && ( GE_16( t0, t0_fr1 ) ) ) + { + *pit_frac = 0; + move16(); + + return ( t0 ); + } + + /*------------------------------------------------------------------* + * Search fractionnal pitch with 1/4 subsample resolution. + * search the fractions around t0 and choose the one which maximizes + * the interpolated normalized correlation. + *-----------------------------------------------------------------*/ + + t1 = t0; + move16(); + step = 1; + move16(); /* 1/4 subsample resolution */ + fraction = 1; + move16(); + test(); + test(); + IF( ( ( i_subfr == 0 ) && ( GE_16( t0, t0_fr2 ) ) ) || ( EQ_16( t0_fr2, pit_min ) ) ) + { + step = 2; + move16(); /* 1/2 subsample resolution */ + fraction = 2; + move16(); + } + + IF( EQ_16( t0, t0_min ) ) /* Limit case */ + { + fraction = 0; + move16(); + cor_max = Interpol_4( &corr[t0], fraction ); + } + ELSE + { + t0 = sub( t0, 1 ); + cor_max = Interpol_4( &corr[t0], fraction ); + FOR( i = add( fraction, step ); i <= 3; i = (Word16) ( i + step ) ) + { + temp = Interpol_4( &corr[t0], i ); + IF( GT_16( temp, cor_max ) ) + { + cor_max = temp; + move16(); + fraction = i; + move16(); + } + } + } + + FOR( i = 0; i <= 3; i = (Word16) ( i + step ) ) + { + temp = Interpol_4( &corr[t1], i ); + IF( GT_16( temp, cor_max ) ) + { + cor_max = temp; + move16(); + fraction = i; + move16(); + t0 = t1; + move16(); + } + } + + *pit_frac = fraction; + move16(); + return ( t0 ); +} Word16 pitch_fr4_fx( /* o : chosen integer pitch lag */ const Word16 exc[], /* i : excitation buffer Q_new*/ const Word16 xn[], /* i : target signal Q_new-1+shift*/ @@ -1329,6 +1518,120 @@ Word16 pitch_fr4_fx( /* o : chosen integer pitch lag * excitation divided by the square root of energy of filtered * excitation) *---------------------------------------------------------------------*/ +void norm_corr_ivas_fx( + const Word16 exc[], /* i : excitation buffer Q_new*/ + const Word16 xn[], /* i : target signal Q_new-1+shift*/ + const Word16 h[], /* i : weighted synthesis filter impulse response Q(14+shift)*/ + const Word16 t_min, /* i : minimum value of searched range */ + const Word16 t_max, /* i : maximum value of searched range */ + Word16 ncorr[], /* o : normalized correlation Q15 */ + const Word16 L_subfr /* i : subframe size */ +) +{ + Word16 i, k, t; + Word16 corr, exp_corr, norm, exp_norm, exp, scale; + Word16 excf[L_FRAME16k]; + Word32 L_tmp; + Word64 W_tmp; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; +#endif + + k = negate( t_min ); + + /*-----------------------------------------------------------------* + * compute the filtered excitation for the first delay t_min + *-----------------------------------------------------------------*/ + + conv_fx( &exc[k], h, excf, L_subfr ); + + /* Compute rounded down 1/sqrt(energy of xn[]) */ +#ifdef BASOP_NOGLOB + L_tmp = L_mac_o( 1, xn[0], xn[0], &Overflow ); +#else + L_tmp = L_mac( 1, xn[0], xn[0] ); +#endif + FOR( i = 1; i < L_subfr; i++ ) + { +#ifdef BASOP_NOGLOB + L_tmp = L_mac_o( L_tmp, xn[i], xn[i], &Overflow ); +#else /* BASOP_NOGLOB */ + L_tmp = L_mac( L_tmp, xn[i], xn[i] ); +#endif + } + exp = norm_l( L_tmp ); + exp = sub( 30, exp ); + + exp = add( exp, 2 ); /* energy of xn[] x 2 + rounded up */ + scale = negate( shr( exp, 1 ) ); /* (1< 0; i-- ) + { + /* saturation can occur in add() */ + /*excf[i] = add(mult(exc[k], h[i]), excf[i - 1]); move16(); */ +#ifdef BASOP_NOGLOB + excf[i] = round_fx_sat( L_mac_sat( L_mult( excf[i - 1], 32767 ), exc[k], h[i] ) ); +#else + excf[i] = round_fx( L_mac( L_mult( excf[i - 1], 32767 ), exc[k], h[i] ) ); +#endif + } + excf[0] = mult_r( exc[k], h[0] ); + move16(); + } + } + + return; +} void norm_corr_fx( const Word16 exc[], /* i : excitation buffer Q_new*/ diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index dc4b85c7a..1fabebb7b 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -933,7 +933,15 @@ void norm_corr_fx( Word16 ncorr[], /* o : normalized correlation Q15 */ const Word16 L_subfr /* i : subframe size */ ); - +void norm_corr_ivas_fx( + const Word16 exc[], /* i : excitation buffer Q_new*/ + const Word16 xn[], /* i : target signal Q_new-1+shift*/ + const Word16 h[], /* i : weighted synthesis filter impulse response Q(14+shift)*/ + const Word16 t_min, /* i : minimum value of searched range */ + const Word16 t_max, /* i : maximum value of searched range */ + Word16 ncorr[], /* o : normalized correlation Q15 */ + const Word16 L_subfr /* i : subframe size */ +); Word16 peak_avrg_ratio_fx( const Word32 total_brate, const Word32 *input_hi_fx, /* i : i signal */ @@ -995,6 +1003,20 @@ Word16 pitch_fr4_fx( /* o : chosen integer pitch lag const Word16 L_frame, /* i : length of the frame */ const Word16 L_subfr /* i : size of subframe */ ); +Word16 pitch_fr4_ivas_fx( /* o : chosen integer pitch lag */ + const Word16 exc[], /* i : excitation buffer Q_new*/ + const Word16 xn[], /* i : target signal Q_new-1+shift*/ + const Word16 h[], /* i : weighted synthesis filter impulse response Q(14+shift)*/ + const Word16 t0_min, /* i : minimum value in the searched range. Q0*/ + const Word16 t0_max, /* i : maximum value in the searched range. Q0*/ + Word16 *pit_frac, /* o : chosen fraction (0, 1, 2 or 3) */ + const Word16 i_subfr, /* i : flag to first subframe */ + const Word16 limit_flag, /* i : flag for limits (0=restrained, 1=extended) */ + const Word16 t0_fr2, /* i : minimum value for resolution 1/2 */ + const Word16 t0_fr1, /* i : minimum value for resolution 1 */ + const Word16 L_frame, /* i : length of the frame */ + const Word16 L_subfr /* i : size of subframe */ +); void pit_Q_enc_fx( BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */ const Word16 Opt_AMR_WB, /* i : flag indicating AMR-WB IO mode */ diff --git a/lib_enc/transition_enc_fx.c b/lib_enc/transition_enc_fx.c index 513ea4098..b4f9b2681 100644 --- a/lib_enc/transition_enc_fx.c +++ b/lib_enc/transition_enc_fx.c @@ -954,13 +954,15 @@ void transition_enc_ivas_fx( move16(); *clip_gain = 0; move16(); - g_corr_fx[0] = 16384; + g_corr_fx[0] = MAX16B /* 1.0f in Q15 */; move16(); - g_corr_fx[1] = add( shl( sub( shift_wsp, 1 ), 1 ), 1 ); move16(); - g_corr_fx[2] = -16384; + g_corr_fx[1] = 0; + move16(); + g_corr_fx[2] = MAX16B /* 1.0f in Q15 */; + move16(); + g_corr_fx[3] = 0; move16(); - g_corr_fx[3] = shl( sub( shift_wsp, 1 ), 1 ); set16_fx( &exc_fx[i_subfr], 0, L_SUBFR ); /* set excitation for current subrame to 0 */ @@ -1380,8 +1382,8 @@ void transition_enc_ivas_fx( { *clip_gain = gp_clip_fx( st_fx->element_mode, st_fx->core_brate, st_fx->voicing_fx, i_subfr, TRANSITION, xn_fx, gp_cl_fx, ( Q_new + shift - 1 ) ); - lp_select = lp_filt_exc_enc_fx( MODE1, TRANSITION, i_subfr, exc_fx, h1_fx, - xn_fx, y1_fx, xn2_fx, L_SUBFR, st_fx->L_frame, g_corr_fx, *clip_gain, gain_pit_fx, &lp_flag ); + lp_select = lp_filt_exc_enc_ivas_fx( MODE1, TRANSITION, i_subfr, exc_fx, h1_fx, + xn_fx, y1_fx, xn2_fx, L_SUBFR, st_fx->L_frame, g_corr_fx, *clip_gain, gain_pit_fx, &lp_flag ); IF( EQ_16( lp_flag, NORMAL_OPERATION ) ) { @@ -1570,10 +1572,14 @@ void transition_enc_ivas_fx( move16(); *clip_gain = 0; move16(); - g_corr_fx[0] = 0; + g_corr_fx[0] = 328 /* 0.01f in Q15 */; move16(); g_corr_fx[1] = 0; move16(); + g_corr_fx[2] = 328 /* 0.01f in Q15 */; + move16(); + g_corr_fx[3] = 0; + move16(); *Jopt_flag = 0; move16(); @@ -1606,8 +1612,8 @@ void transition_enc_ivas_fx( *clip_gain = gp_clip_fx( st_fx->element_mode, st_fx->core_brate, st_fx->voicing_fx, i_subfr, TRANSITION, xn_fx, gp_cl_fx, Q_new ); - lp_select = lp_filt_exc_enc_fx( MODE1, TRANSITION, i_subfr, exc_fx, h1_fx, - xn_fx, y1_fx, xn2_fx, L_SUBFR, st_fx->L_frame, g_corr_fx, *clip_gain, gain_pit_fx, &lp_flag ); + lp_select = lp_filt_exc_enc_ivas_fx( MODE1, TRANSITION, i_subfr, exc_fx, h1_fx, + xn_fx, y1_fx, xn2_fx, L_SUBFR, st_fx->L_frame, g_corr_fx, *clip_gain, gain_pit_fx, &lp_flag ); IF( EQ_16( lp_flag, NORMAL_OPERATION ) ) { -- GitLab