From d762be63cbf7dd0192c755d44f72c488f31471a1 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 17 Jun 2025 14:04:08 +0530 Subject: [PATCH 1/2] Bit exact optimization changes for sba enc path ivas_calc_p_coeffs_per_band_enc_fx, Interpol_lc_fx, RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx, RCcontextMapping_encode2_estimate_bandWise_fx funtions basop optimizations Gain of ~4 WMOPS. --- lib_com/interpol_fx.c | 5 + lib_com/ivas_spar_com_fx.c | 88 +++++++++++- lib_com/options.h | 1 + lib_enc/ACcontextMapping_enc_fx.c | 213 +++++++++++++++++++++++------- 4 files changed, 261 insertions(+), 46 deletions(-) diff --git a/lib_com/interpol_fx.c b/lib_com/interpol_fx.c index a490a7552..030d41735 100644 --- a/lib_com/interpol_fx.c +++ b/lib_com/interpol_fx.c @@ -70,9 +70,14 @@ Word32 Interpol_lc_fx( /* o : interpolated value c2 += up_samp; /* move16() not needed, since the coefficient can be rearrange in bit exact way */ c1 += up_samp; } +#ifdef OPT_SBA_ENC_V2_BE + L_sum = W_shl_sat_l( L_sum64, 1 ); /*Q15*/ + } +#else L_sum = W_sat_l( L_sum64 ); /*Q14*/ } L_sum = L_shl_sat( L_sum, 1 ); /*Q15*/ +#endif return L_sum; } diff --git a/lib_com/ivas_spar_com_fx.c b/lib_com/ivas_spar_com_fx.c index ef549a2ae..15a529149 100644 --- a/lib_com/ivas_spar_com_fx.c +++ b/lib_com/ivas_spar_com_fx.c @@ -2361,18 +2361,30 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( Word32 re1, re2; W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], cov_dd_re[0][0] ); /*q_cov_dd_re+ pSparMd->band_coeffs[b_ts_idx].q_C_re_fx*/ +#ifdef OPT_SBA_ENC_V2_BE + q_tmp1 = sub( W_norm( W_tmp ), 32 ); + re1 = W_shl_sat_l( W_tmp, q_tmp1 ); /*q_cov_dd_re+ q_C_re+q_tmp1*/ + q_tmp1 = add( add( q_C_re, q_tmp1 ), q_cov_dd_re ); +#else q_tmp1 = W_norm( W_tmp ); re1 = W_extract_h( W_shl( W_tmp, q_tmp1 ) ); /*q_cov_dd_re+ q_C_re+q_tmp1-32*/ q_tmp1 = sub( add( add( q_C_re, q_tmp1 ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp1 = 31; move16(); } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[1][0], cov_dd_re[0][0] ); /*q_cov_dd_re+ q_C_re*/ +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 32 ); + re2 = W_shl_sat_l( W_tmp, q_tmp ); /*q_cov_dd_re+ q_C_re+q_tmp*/ + q_tmp = add( add( q_C_re, q_tmp ), q_cov_dd_re ); +#else q_tmp = W_norm( W_tmp ); re2 = W_extract_h( W_shl( W_tmp, q_tmp ) ); /*q_cov_dd_re+ q_C_re+q_tmp-32*/ q_tmp = sub( add( add( q_C_re, q_tmp ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2380,12 +2392,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], re1 ); // q_tmp1+q_C_re +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[0][0] = W_shl_sat_l( W_tmp, q_factor ); // q_tmp1+q_C_re+q_recon_uu_re[0][0] + move32(); + q_recon_uu_re[0][0] = add( add( q_C_re, q_factor ), q_tmp1 ); + move16(); +#else q_recon_uu_re[0][0] = W_norm( W_tmp ); move16(); recon_uu_re[0][0] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[0][0] ) ); // q_tmp1+q_C_re+q_recon_uu_re[0][0]-32 move32(); q_recon_uu_re[0][0] = sub( add( add( q_C_re, q_recon_uu_re[0][0] ), q_tmp1 ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[0][0] = 31; @@ -2393,12 +2413,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[1][0], re1 ); // q_C_re+q_tmp1 +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[0][1] = W_shl_sat_l( W_tmp, q_factor ); // q_C_re+q_tmp1+q_recon_uu_re[0][1] + move32(); + q_recon_uu_re[0][1] = add( add( q_C_re, q_factor ), q_tmp1 ); + move16(); +#else q_recon_uu_re[0][1] = W_norm( W_tmp ); move16(); recon_uu_re[0][1] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[0][1] ) ); // q_C_re+q_tmp1+q_recon_uu_re[0][1]-32 move32(); q_recon_uu_re[0][1] = sub( add( add( q_C_re, q_recon_uu_re[0][1] ), q_tmp1 ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[0][1] = 31; @@ -2406,12 +2434,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], re2 ); // q_C_re+q_tmp +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[1][0] = W_shl_sat_l( W_tmp, q_factor ); // q_C_re+q_tmp+q_recon_uu_re[1][0] + move32(); + q_recon_uu_re[1][0] = add( add( q_C_re, q_factor ), q_tmp ); + move16(); +#else q_recon_uu_re[1][0] = W_norm( W_tmp ); move16(); recon_uu_re[1][0] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[1][0] ) ); // q_C_re+q_tmp+q_recon_uu_re[1][0]-32 move32(); q_recon_uu_re[1][0] = sub( add( add( q_C_re, q_recon_uu_re[1][0] ), q_tmp ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[1][0] = 31; @@ -2419,12 +2455,20 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[1][0], re2 ); // q_C_re+q_tmp +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( W_tmp ), 32 ); + recon_uu_re[1][1] = W_shl_sat_l( W_tmp, q_factor ); // q_C_re+q_tmp+q_recon_uu_re[1][1] + move32(); + q_recon_uu_re[1][1] = add( add( q_C_re, q_factor ), q_tmp ); + move16(); +#else q_recon_uu_re[1][1] = W_norm( W_tmp ); move16(); recon_uu_re[1][1] = W_extract_h( W_shl( W_tmp, q_recon_uu_re[1][1] ) ); // q_C_re+q_tmp+q_recon_uu_re[1][1]-32 move32(); q_recon_uu_re[1][1] = sub( add( add( q_C_re, q_recon_uu_re[1][1] ), q_tmp ), 32 ); move16(); +#endif if ( W_tmp == 0 ) { q_recon_uu_re[1][1] = 31; @@ -2441,12 +2485,18 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } } q_tmp = sub( s_min( q_tmp, q_cov_uu_re ), 1 ); - +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( q_cov_uu_re, q_tmp ); +#endif FOR( i = 0; i < 2; i++ ) { FOR( j = 0; j < 2; j++ ) { +#ifdef OPT_SBA_ENC_V2_BE + cov_uu_re[i][j] = L_sub( L_shr( cov_uu_re[i][j], q_factor ), L_shr( recon_uu_re[i][j], sub( q_recon_uu_re[i][j], q_tmp ) ) ); // q_tmp +#else cov_uu_re[i][j] = L_sub( L_shr( cov_uu_re[i][j], sub( q_cov_uu_re, q_tmp ) ), L_shr( recon_uu_re[i][j], sub( q_recon_uu_re[i][j], q_tmp ) ) ); // q_tmp +#endif move32(); } } @@ -2466,9 +2516,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( { Word32 re; W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][k], cov_dd_re[k][j] ); // q_C_re+q_cov_dd_re +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 33 ); + re = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_cov_dd_re+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_cov_dd_re ); +#else q_tmp = sub( W_norm( W_tmp ), 1 ); re = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_cov_dd_re+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2492,9 +2548,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( } W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][0], re1[0] ); // q_C_re+q_re1[0] +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 33 ); + re2 = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_re1[0]+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_re1[0] ); +#else q_tmp = sub( W_norm( W_tmp ), 1 ); re2 = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_re1[0]+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_re1[0] ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2504,9 +2566,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( move32(); W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[0][1], re1[1] ); // q_C_re+q_re1[1] +#ifdef OPT_SBA_ENC_V2_BE + q_tmp1 = sub( W_norm( W_tmp ), 33 ); + re2 = W_shl_sat_l( W_tmp, q_tmp1 ); // q_C_re+q_re1[1]+q_tmp1 + q_tmp1 = add( add( q_C_re, q_tmp1 ), q_re1[1] ); +#else q_tmp1 = sub( W_norm( W_tmp ), 1 ); re2 = W_extract_h( W_shl( W_tmp, q_tmp1 ) ); // q_C_re+q_re1[1]+q_tmp1-32 q_tmp1 = sub( add( add( q_C_re, q_tmp1 ), q_re1[1] ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp1 = 31; @@ -2585,9 +2653,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( FOR( k = 0; k < num_dmx - 1; k++ ) { W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[i][k], cov_dd_re[k][m] ); // q_C_re+q_cov_dd_re +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 34 ); + re = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_cov_dd_re+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_cov_dd_re ); +#else q_tmp = sub( W_norm( W_tmp ), 2 ); re = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_cov_dd_re+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_cov_dd_re ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2627,9 +2701,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( FOR( m = 0; m < num_dmx - 1; m++ ) { W_tmp = W_mult0_32_32( pSparMd->band_coeffs[b_ts_idx].C_re_fx[j][m], re1[m] ); // q_C_re+q_re1[m] +#ifdef OPT_SBA_ENC_V2_BE + q_tmp = sub( W_norm( W_tmp ), 34 ); + re = W_shl_sat_l( W_tmp, q_tmp ); // q_C_re+q_re1[m]+q_tmp + q_tmp = add( add( q_C_re, q_tmp ), q_re1[m] ); +#else q_tmp = sub( W_norm( W_tmp ), 2 ); re = W_extract_h( W_shl( W_tmp, q_tmp ) ); // q_C_re+q_re1[m]+q_tmp-32 q_tmp = sub( add( add( q_C_re, q_tmp ), q_re1[m] ), 32 ); +#endif if ( W_tmp == 0 ) { q_tmp = 31; @@ -2714,9 +2794,15 @@ static void ivas_calc_p_coeffs_per_band_enc_fx( move16(); IF( trace != 0 ) { +#ifdef OPT_SBA_ENC_V2_BE + q_factor = sub( W_norm( trace ), 32 ); + tmp = Mpy_32_32( p_norm_scaling, W_shl_sat_l( trace, q_factor ) ); // q_cov_uu_re+q_factor + q_factor = add( q_cov_uu_re, q_factor ); +#else q_factor = W_norm( trace ); tmp = Mpy_32_32( p_norm_scaling, W_extract_h( W_shl( trace, q_factor ) ) ); // q_cov_uu_re+q_factor-32 q_factor = sub( add( q_cov_uu_re, q_factor ), 32 ); +#endif IF( GT_16( q_factor, q_postpred_cov_re ) ) { tmp = L_shr( tmp, sub( q_factor, q_postpred_cov_re ) ); // q_postpred_cov_re diff --git a/lib_com/options.h b/lib_com/options.h index ec5b35f46..7996f8d41 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -80,6 +80,7 @@ #define OPT_SBA_REND_V1_BE #define OPT_HEAD_ROT_REND_V1_BE #define OPT_SBA_DEC_V2_BE +#define OPT_SBA_ENC_V2_BE #define OPT_SBA_ENC_V1_BE #define OPT_BIN_RENDERER_V1 #define OPT_BIN_RENDERER_V2 diff --git a/lib_enc/ACcontextMapping_enc_fx.c b/lib_enc/ACcontextMapping_enc_fx.c index 5003712b7..0bbc78912 100644 --- a/lib_enc/ACcontextMapping_enc_fx.c +++ b/lib_enc/ACcontextMapping_enc_fx.c @@ -1249,6 +1249,10 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( k = 1; move16(); +#ifdef OPT_SBA_ENC_V2_BE + Word16 round_bit_estimate_fx; +#endif + WHILE( LT_16( k, nt / 2 ) ) { bit_estimate_fx = W_add( bit_estimate_fx, MAKE_NUMBER_QX( 1, Q23 ) ); @@ -1334,7 +1338,7 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( ctx = &c[L_or( p1, p2 )]; t = (UWord16) L_add( *ctx, rateFlag ); - IF( LT_16( nt_half, idx ) ) + if ( LT_16( nt_half, idx ) ) { t = add( t, ( 1 << NBITS_CONTEXT ) ); } @@ -1351,6 +1355,19 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ /* MSBs coding */ +#ifdef OPT_SBA_ENC_V2_BE + FOR( ; s_max( a1, b1 ) >= A_THRES; ) + { + pki = lookup[lev1]; /* ESC symbol */ + + bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][VAL_ESC] ); + bit_estimate_fx = W_add( bit_estimate_fx, MAKE_VARIABLE_QX( 2, Q23 ) ); + a1 = shr( a1, 1 ); + b1 = shr( b1, 1 ); + + lev1 = s_min( add( lev1, ( 1 << ( NBITS_CONTEXT + NBITS_RATEQ ) ) ), 2 << ( NBITS_CONTEXT + NBITS_RATEQ ) ); + } +#else WHILE( GE_16( s_max( a1, b1 ), A_THRES ) ) { pki = lookup[lev1]; /* ESC symbol */ @@ -1364,14 +1381,18 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ } - +#endif pki = lookup[lev1]; symbol = add( a1, i_mult( A_THRES, b1 ) ); /* Q0 */ bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][symbol] ); +#ifdef OPT_SBA_ENC_V2_BE + IF( GT_64( bit_estimate_fx, W_shl( target, Q23 ) ) ) // Q23 +#else /* Should we truncate? */ IF( GT_32( W_extract_l( W_shr( bit_estimate_fx, Q8 ) ), L_shl( target, Q15 ) ) ) +#endif { stop2 = 1; move16(); @@ -1393,6 +1414,13 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( { lev1 = shr( lev1, NBITS_CONTEXT + NBITS_RATEQ ); +#ifdef OPT_SBA_ENC_V2_BE + t = add( 13, lev1 ); + IF( lev1 <= 0 ) + { + t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); + } +#else IF( lev1 <= 0 ) { t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); @@ -1401,6 +1429,7 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( { t = add( 13, lev1 ); } +#endif *ctx = L_add( imult3216( L_and( *ctx, 0xf ), 16 ), t ); move32(); @@ -1425,15 +1454,21 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( } } /*end of the 2-tuples loop*/ - - total_output_bits = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ - +#ifdef OPT_SBA_ENC_V2_BE + total_output_bits = round_fx( W_shl_sat_l( bit_estimate_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ +#else + total_output_bits = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ +#endif IF( *stop ) { - total_output_bits = round_fx( W_extract_l( W_shr( nbits2_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ +#ifdef OPT_SBA_ENC_V2_BE + total_output_bits = round_fx( W_shl_sat_l( nbits2_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ +#else + total_output_bits = round_fx( W_extract_l( W_shr( nbits2_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ +#endif } - IF( stop2 ) + if ( stop2 ) { stop2 = total_output_bits; /* Q0 */ move16(); @@ -1455,8 +1490,11 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( hm_cfg->numPeakIndices = numPeakIndicesOrig; /* Q0 */ move16(); - +#ifdef OPT_SBA_ENC_V2_BE + return round_fx( L_add( W_shl_sat_l( nbits2_fx, -Q7 ), ONE_IN_Q14 ) ); /* Q0 */ +#else return round_fx( L_add( W_extract_l( W_shr( nbits2_fx, Q7 ) ), ONE_IN_Q14 ) ); /* Q0 */ +#endif } ELSE /* if (!hm_cfg) */ { @@ -1530,6 +1568,21 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ /* MSBs coding */ +#ifdef OPT_SBA_ENC_V2_BE + FOR( ; s_max( a1, b1 ) >= A_THRES; ) + { + pki = lookup[( esc_nb << ( NBITS_CONTEXT + NBITS_RATEQ ) )]; /* Q0 */ + + bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][VAL_ESC] ); + bit_estimate_fx = W_add( bit_estimate_fx, MAKE_NUMBER_QX( 2, Q23 ) ); + + a1 = shr( a1, 1 ); + b1 = shr( b1, 1 ); + + lev1 = add( lev1, 1 ); + esc_nb = s_min( lev1, 3 ); + } +#else WHILE( GE_16( s_max( a1, b1 ), A_THRES ) ) { pki = lookup[( esc_nb << ( NBITS_CONTEXT + NBITS_RATEQ ) )]; /* Q0 */ @@ -1546,15 +1599,18 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* check while condition */ } - +#endif pki = lookup[( esc_nb << ( NBITS_CONTEXT + NBITS_RATEQ ) )]; /* Q0 */ - move16(); symbol = add( a1, i_mult( A_THRES, b1 ) ); /* Q0 */ bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][symbol] ); /* Should we truncate? */ +#ifdef OPT_SBA_ENC_V2_BE + IF( GT_64( bit_estimate_fx, W_shl( target, Q23 ) ) ) // Q23 +#else IF( GT_32( W_extract_l( W_shr( bit_estimate_fx, Q8 ) ), L_shl( target, Q15 ) ) ) +#endif { overflow_flag = 1; move16(); @@ -1570,6 +1626,14 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( } } +#ifdef OPT_SBA_ENC_V2_BE + /* Update context for next 2-tuple */ + cp = add( 1, i_mult( add( a1, b1 ), add( esc_nb, 1 ) ) ); /* Q0 */ + if ( GE_16( esc_nb, 2 ) ) + { + cp = add( 12, esc_nb ); /* Q0 */ + } +#else /* Update context for next 2-tuple */ IF( LT_16( esc_nb, 2 ) ) { @@ -1579,59 +1643,94 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( { cp = add( 12, esc_nb ); /* Q0 */ } +#endif /*shift old bits and replace last 4 bits*/ s = (UWord16) L_add( L_shl( s, 4 ), cp ); t = s_and( s, 0xFF ); - } /*end of the 2-tuples loop*/ +#ifdef OPT_SBA_ENC_V2_BE + tot_bits2 = round_fx( W_shl_sat_l( nbits2_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ + round_bit_estimate_fx = round_fx( W_shl_sat_l( bit_estimate_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ +#else tot_bits2 = round_fx( W_extract_l( W_shr( nbits2_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ - IF( LT_16( lastnz2, lastnz ) ) /* Overflow occured because unable to code all tuples */ +#endif + if ( LT_16( lastnz2, lastnz ) ) /* Overflow occured because unable to code all tuples */ { overflow_flag = 1; move16(); } +#ifdef OPT_SBA_ENC_V2_BE + if ( EQ_16( mode, -1 ) ) + { + tot_bits2 = round_bit_estimate_fx; + move16(); + } +#else IF( EQ_16( mode, -1 ) ) { - tot_bits2 = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ + tot_bits2 = round_fx( W_shl_sat_l( bit_estimate_fx, -Q7 ) ); /* Q23 -> Q16 -> Q0 */ + } +#endif +#ifdef OPT_SBA_ENC_V2_BE + if ( overflow_flag == 0 ) /* No overflow */ + { + *stop = 0; + move16(); } + IF( overflow_flag != 0 ) /* Overflow */ + { + IF( *stop ) + { + *stop = tot_bits2; /* Q0 */ + move16(); + } + ELSE + { + *stop = round_bit_estimate_fx; + move16(); + } + } +#else IF( overflow_flag == 0 ) /* No overflow */ { *stop = 0; move16(); } ELSE /* Overflow */ + { + IF( *stop ) { - IF( *stop ){ - *stop = tot_bits2; /* Q0 */ + *stop = tot_bits2; /* Q0 */ + move16(); + } + ELSE + { + *stop = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ + move16(); + } + } +#endif + + *lastnz_out = lastnz; /* Q0 */ move16(); - } - ELSE - { - *stop = round_fx( W_extract_l( W_shr( bit_estimate_fx, Q7 ) ) ); /* Q23 -> Q16 -> Q0 */ + *nEncoded = lastnz2; /* Q0 */ move16(); - } -} + /* Safety mechanism to avoid overflow */ + test(); + IF( EQ_16( lastnz2, 2 ) && EQ_16( overflow_flag, 1 ) ) + { + FOR( k = 0; k < lastnz2; k++ ) + { + x[k] = 0; + move16(); + } + } -*lastnz_out = lastnz; /* Q0 */ -move16(); -*nEncoded = lastnz2; /* Q0 */ -move16(); -/* Safety mechanism to avoid overflow */ -test(); -IF( EQ_16( lastnz2, 2 ) && EQ_16( overflow_flag, 1 ) ) -{ - FOR( k = 0; k < lastnz2; k++ ) - { - x[k] = 0; - move16(); + return tot_bits2; } } -return tot_bits2; -} -} - /*-------------------------------------------------------------------* * RCcontextMapping_encode2_estimate_bandWise_start_fx() @@ -1743,6 +1842,15 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( /* Get context */ t = add( hContextMem->ctx, hContextMem->rateFlag ); /* Q0 */ +#ifdef OPT_SBA_ENC_V2_BE + tmp = ( 1 << NBITS_CONTEXT ); + move16(); + if ( GE_16( hContextMem->nt_half, idx ) ) + { + tmp = 0; + move16(); + } +#else IF( GE_16( hContextMem->nt_half, idx ) ) { tmp = 0; @@ -1751,6 +1859,7 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( { tmp = ( 1 << NBITS_CONTEXT ); } +#endif t = add( t, tmp ); /* Q0 */ @@ -1771,15 +1880,18 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( /* check while condition */ /* MSBs coding */ +#ifdef OPT_SBA_ENC_V2_BE + FOR( ; s_max( a1, b1 ) >= A_THRES; ) +#else WHILE( GE_16( s_max( a1, b1 ), A_THRES ) ) +#endif { pki = lookup[lev1]; /* Q0 */ - move16(); hContextMem->bit_estimate_fx = W_add( hContextMem->bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][VAL_ESC] ); hContextMem->bit_estimate_fx = W_add( hContextMem->bit_estimate_fx, MAKE_NUMBER_QX( 2, Q23 ) ); - move32(); - move32(); + move64(); + move64(); // hContextMem->bit_estimate = hContextMem->bit_estimate + ari_bit_estimate_s17_LC[pki][VAL_ESC]; @@ -1789,20 +1901,27 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( b1 = shr( b1, 1 ); lev1 = s_min( add( lev1, ( 1 << ( NBITS_CONTEXT + NBITS_RATEQ ) ) ), 2 << ( NBITS_CONTEXT + NBITS_RATEQ ) ); /* Q0 */ - /* check while condition */ + /* check while condition */ } pki = lookup[lev1]; /* Q0 */ - move16(); + symbol = add( a1, i_mult( A_THRES, b1 ) ); /* MSB symbol Q0*/ hContextMem->bit_estimate_fx = W_add( hContextMem->bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][symbol] ); - move32(); + move64(); // hContextMem->bit_estimate = hContextMem->bit_estimate + ari_bit_estimate_s17_LC[pki][symbol]; /* Update context */ lev1 = shr( lev1, NBITS_CONTEXT + NBITS_RATEQ ); +#ifdef OPT_SBA_ENC_V2_BE + t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); /* Q0 */ + if ( lev1 > 0 ) + { + t = add( 13, lev1 ); /* Q0 */ + } +#else IF( lev1 <= 0 ) { t = add( 1, i_mult( add( a1, b1 ), add( lev1, 2 ) ) ); /* Q0 */ @@ -1811,12 +1930,16 @@ Word16 RCcontextMapping_encode2_estimate_bandWise_fx( { t = add( 13, lev1 ); /* Q0 */ } - +#endif hContextMem->ctx = add( i_mult( s_and( hContextMem->ctx, 0xf ), 16 ), t ); /* Q0 */ move16(); - } /*end of the 2-tuples loop*/ + } /*end of the 2-tuples loop*/ +#ifdef OPT_SBA_ENC_V2_BE + total_output_bits = round_fx( W_shl_sat_l( hContextMem->bit_estimate_fx, -Q7 ) ); /* Q0 */ +#else total_output_bits = round_fx( W_extract_l( W_shr( hContextMem->bit_estimate_fx, Q7 ) ) ); /* Q0 */ +#endif // total_output_bits = (Word16) ( hContextMem->bit_estimate + 0.5f ); -- GitLab From 3d8604007ee24a30d923c1e6a0c491327479a860 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 17 Jun 2025 17:12:13 +0530 Subject: [PATCH 2/2] Fix for SSNR deviation observed for one STV case --- lib_enc/ACcontextMapping_enc_fx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib_enc/ACcontextMapping_enc_fx.c b/lib_enc/ACcontextMapping_enc_fx.c index 0bbc78912..a7944410b 100644 --- a/lib_enc/ACcontextMapping_enc_fx.c +++ b/lib_enc/ACcontextMapping_enc_fx.c @@ -1251,6 +1251,7 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( #ifdef OPT_SBA_ENC_V2_BE Word16 round_bit_estimate_fx; + Word32 target_Q15 = L_shl( target, Q15 ); // Q15 #endif WHILE( LT_16( k, nt / 2 ) ) @@ -1388,7 +1389,7 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( bit_estimate_fx = W_add( bit_estimate_fx, ari_bit_estimate_s17_LC_fx[pki][symbol] ); #ifdef OPT_SBA_ENC_V2_BE - IF( GT_64( bit_estimate_fx, W_shl( target, Q23 ) ) ) // Q23 + IF( GT_32( W_shl_sat_l( bit_estimate_fx, -Q8 ), target_Q15 ) ) // Q15 #else /* Should we truncate? */ IF( GT_32( W_extract_l( W_shr( bit_estimate_fx, Q8 ) ), L_shl( target, Q15 ) ) ) @@ -1607,7 +1608,7 @@ Word16 RCcontextMapping_encode2_estimate_no_mem_s17_LCS_fx( /* Should we truncate? */ #ifdef OPT_SBA_ENC_V2_BE - IF( GT_64( bit_estimate_fx, W_shl( target, Q23 ) ) ) // Q23 + IF( GT_32( W_shl_sat_l( bit_estimate_fx, -Q8 ), target_Q15 ) ) // Q15 #else IF( GT_32( W_extract_l( W_shr( bit_estimate_fx, Q8 ) ), L_shl( target, Q15 ) ) ) #endif -- GitLab