diff --git a/lib_com/ivas_dirac_com_fx.c b/lib_com/ivas_dirac_com_fx.c index 14dd6c1484c1b454c76081e00de84f7aeabd1864..befeb273c5a7a7cd86e706e13f1e16ffaaf9f4f8 100644 --- a/lib_com/ivas_dirac_com_fx.c +++ b/lib_com/ivas_dirac_com_fx.c @@ -1112,6 +1112,17 @@ void calculate_hodirac_sector_parameters_fx( Word16 i_sec, i_bin, i_band; Word32 p_real_fx, p_imag_fx, normI_fx, energy_fx, tmp_diff_fx; Word16 energy_exp, normI_exp, tmp_diff_exp; +#ifdef OPT_SBA_ENC_V2_NBE + Word16 tmp_exp_1 = sub( 33, shl( Qfac, 1 ) ); // 31 - (2 *Qfac - 2 ) + Word16 tmp_exp_2 = sub( 35, shl( Qfac, 1 ) ); // 31 - (2 *Qfac - 4 ) + Word32 tmp32_1, tmp32_2; + Word64 temp_x64 = 0, temp_y64 = 0, temp_z64 = 0; + Word16 tmp_scale = 0; + move64(); + move64(); + move64(); + move16(); +#endif Word32 sec_I_vec_x_fx[NUM_ANA_SECTORS]; Word32 sec_I_vec_y_fx[NUM_ANA_SECTORS]; @@ -1185,7 +1196,16 @@ void calculate_hodirac_sector_parameters_fx( move32(); energy_exp = 0; move16(); - +#ifdef OPT_SBA_ENC_V2_NBE + Word64 sec_I_vec_x_64_fx = 0; + Word64 sec_I_vec_y_64_fx = 0; + Word64 sec_I_vec_z_64_fx = 0; + Word64 energy_64_fx = 0; + move64(); + move64(); + move64(); + move64(); +#endif IF( i_sec == 0 ) { FOR( i_bin = band_grouping[i_band]; i_bin < band_grouping[i_band + 1]; i_bin++ ) @@ -1194,7 +1214,18 @@ void calculate_hodirac_sector_parameters_fx( move32(); Word32 sec_w_imag_fx, sec_x_imag_fx, sec_y_imag_fx, sec_z_imag_fx; Word32 sec_w_real_fx, sec_x_real_fx, sec_y_real_fx, sec_z_real_fx; - +#ifdef OPT_SBA_ENC_V2_NBE + sec_w_imag_fx = Madd_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_0_fx ) ), HODIRAC_FAC1, *( p_ImagBuffer_1_fx ) ); // Qfac - 2 + sec_x_imag_fx = Madd_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_3_fx++ ) ), HODIRAC_FAC2, *( p_ImagBuffer_4_fx++ ) ); // Qfac - 2 + sec_y_imag_fx = Msub_32_32( ( Msub_32_32( ( Madd_32_32( Mpy_32_32( HODIRAC_FAC3, *( p_ImagBuffer_0_fx++ ) ), HODIRAC_FAC1, *( p_ImagBuffer_1_fx++ ) ) ), HODIRAC_FAC3, *( p_ImagBuffer_6_fx++ ) ) ), HODIRAC_FAC2, *( p_ImagBuffer_8_fx++ ) ); // Qfac - 2 + sec_z_imag_fx = Madd_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_2_fx++ ) ), HODIRAC_FAC2, *( p_ImagBuffer_5_fx++ ) ); // Qfac - 2 + + sec_w_real_fx = Madd_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_0_fx ) ), HODIRAC_FAC1, *( p_RealBuffer_1_fx ) ); // Qfac - 2 + sec_x_real_fx = Madd_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_3_fx++ ) ), HODIRAC_FAC2, *( p_RealBuffer_4_fx++ ) ); // Qfac - 2 + sec_y_real_fx = Msub_32_32( ( Msub_32_32( Madd_32_32( Mpy_32_32( HODIRAC_FAC3, *( p_RealBuffer_0_fx++ ) ), HODIRAC_FAC1, *( p_RealBuffer_1_fx++ ) ), HODIRAC_FAC3, *( p_RealBuffer_6_fx++ ) ) ), HODIRAC_FAC2, *( p_RealBuffer_8_fx++ ) ); // Qfac - 2 + sec_z_real_fx = Madd_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_2_fx++ ) ), HODIRAC_FAC2, *( p_RealBuffer_5_fx++ ) ); // Qfac - 2 + +#else sec_w_imag_fx = L_add( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_0_fx ) ), Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_1_fx ) ) ); // Qfac - 2 sec_x_imag_fx = L_add( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_3_fx++ ) ), Mpy_32_32( HODIRAC_FAC2, *( p_ImagBuffer_4_fx++ ) ) ); // Qfac - 2 sec_y_imag_fx = L_sub( L_sub( L_add( Mpy_32_32( HODIRAC_FAC3, *( p_ImagBuffer_0_fx++ ) ), Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_1_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC3, *( p_ImagBuffer_6_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC2, *( p_ImagBuffer_8_fx++ ) ) ); // Qfac - 2 @@ -1204,14 +1235,17 @@ void calculate_hodirac_sector_parameters_fx( sec_x_real_fx = L_add( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_3_fx++ ) ), Mpy_32_32( HODIRAC_FAC2, *( p_RealBuffer_4_fx++ ) ) ); // Qfac - 2 sec_y_real_fx = L_sub( L_sub( L_add( Mpy_32_32( HODIRAC_FAC3, *( p_RealBuffer_0_fx++ ) ), Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_1_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC3, *( p_RealBuffer_6_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC2, *( p_RealBuffer_8_fx++ ) ) ); // Qfac - 2 sec_z_real_fx = L_add( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_2_fx++ ) ), Mpy_32_32( HODIRAC_FAC2, *( p_RealBuffer_5_fx++ ) ) ); // Qfac - 2 +#endif +#ifndef OPT_SBA_ENC_V2_NBE Word16 p_q; Word32 tmp_x, tmp_y, tmp_z; Word16 n, n1, n2; - +#endif p_real_fx = Mpy_32_32( sec_w_real_fx, w_fx ); // ( Qfac - 2 ) + 30 - 31 = Qfac - 3 p_imag_fx = Mpy_32_32( sec_w_imag_fx, w_fx ); // ( Qfac - 2 ) + 30 - 31 = Qfac - 3 +#ifndef OPT_SBA_ENC_V2_NBE n1 = norm_l( p_real_fx ); n2 = norm_l( p_imag_fx ); @@ -1234,13 +1268,35 @@ void calculate_hodirac_sector_parameters_fx( move32(); *p_sec_I_vec_z_fx = BASOP_Util_Add_Mant32Exp( *p_sec_I_vec_z_fx, *p_sec_I_vec_z_exp, tmp_z, tmp_exp, p_sec_I_vec_z_exp ); move32(); - - Word32 tmp1; +#else + temp_x64 = W_mac_32_32( W_mult_32_32( p_real_fx, sec_x_real_fx ), p_imag_fx, sec_x_imag_fx ); // ( Qfac - 3 ) + ( Qfac - 2 ) + 1 = 2 * Qfac - 4 + temp_y64 = W_mac_32_32( W_mult_32_32( p_real_fx, sec_y_real_fx ), p_imag_fx, sec_y_imag_fx ); // ( Qfac - 3 ) + ( Qfac - 2 ) + 1 = 2 * Qfac - 4 + temp_z64 = W_mac_32_32( W_mult_32_32( p_real_fx, sec_z_real_fx ), p_imag_fx, sec_z_imag_fx ); // ( Qfac - 3 ) + ( Qfac - 2 ) + 1 = 2 * Qfac - 4 + sec_I_vec_x_64_fx = W_add( sec_I_vec_x_64_fx, temp_x64 ); + sec_I_vec_y_64_fx = W_add( sec_I_vec_y_64_fx, temp_y64 ); + sec_I_vec_z_64_fx = W_add( sec_I_vec_z_64_fx, temp_z64 ); + + Word64 tmp1; +#endif Word64 tmp2, tmp3, tmp4, sec_sum64; +#ifndef OPT_SBA_ENC_V2_NBE + Word32 tmp1; Word32 tmp5, sec_sum; tmp_exp = sub( 62, add( p_q, p_q ) ); tmp1 = BASOP_Util_Add_Mant32Exp( Mpy_32_32( p_real_fx, p_real_fx ), tmp_exp, Mpy_32_32( p_imag_fx, p_imag_fx ), tmp_exp, &tmp_exp ); +#endif +#ifdef OPT_SBA_ENC_V2_NBE + tmp1 = W_mac_32_32( W_mult_32_32( p_real_fx, p_real_fx ), p_imag_fx, p_imag_fx ); // 2 * (Qfac - 3) + 1 + tmp1 = W_shl( tmp1, 2 ); // 2 * (Qfac - 2) + 1 + tmp2 = W_mac_32_32( W_mult_32_32( sec_x_real_fx, sec_x_real_fx ), sec_x_imag_fx, sec_x_imag_fx ); // 2 * (Qfac - 2) + 1 + tmp3 = W_mac_32_32( W_mult_32_32( sec_y_real_fx, sec_y_real_fx ), sec_y_imag_fx, sec_y_imag_fx ); // 2 * (Qfac - 2) + 1 + tmp4 = W_mac_32_32( W_mult_32_32( sec_z_real_fx, sec_z_real_fx ), sec_z_imag_fx, sec_z_imag_fx ); // 2 * (Qfac - 2) + 1 + sec_sum64 = W_add( tmp1, W_add( W_add( tmp2, tmp3 ), tmp4 ) ); // 2 * (Qfac - 2) + 1 + + // instead dividing changed Q// + energy_64_fx = W_add( energy_64_fx, sec_sum64 ); // 2 * (Qfac - 2) + 1 + 1 +#else tmp2 = W_add( W_mult0_32_32( sec_x_real_fx, sec_x_real_fx ), W_mult0_32_32( sec_x_imag_fx, sec_x_imag_fx ) ); // 2 * (Qfac - 2) tmp3 = W_add( W_mult0_32_32( sec_y_real_fx, sec_y_real_fx ), W_mult0_32_32( sec_y_imag_fx, sec_y_imag_fx ) ); // 2 * (Qfac - 2) tmp4 = W_add( W_mult0_32_32( sec_z_real_fx, sec_z_real_fx ), W_mult0_32_32( sec_z_imag_fx, sec_z_imag_fx ) ); // 2 * (Qfac - 2) @@ -1256,7 +1312,51 @@ void calculate_hodirac_sector_parameters_fx( tmp5 = BASOP_Util_Add_Mant32Exp( tmp1, tmp_exp, sec_sum, sec_sum_exp, &tmp_exp ); energy_fx = BASOP_Util_Add_Mant32Exp( energy_fx, energy_exp, tmp5, tmp_exp, &energy_exp ); +#endif + } +#ifdef OPT_SBA_ENC_V2_NBE + tmp_scale = sub( W_norm( energy_64_fx ), 32 ); + energy_fx = W_shl_sat_l( energy_64_fx, tmp_scale ); + energy_exp = sub( tmp_exp_1, tmp_scale ); + if ( energy_fx == 0 ) + { + energy_exp = 0; + move16(); + } + + tmp_scale = sub( W_norm( sec_I_vec_x_64_fx ), 32 ); + *p_sec_I_vec_x_fx = W_shl_sat_l( sec_I_vec_x_64_fx, tmp_scale ); + move32(); + *p_sec_I_vec_x_exp = sub( tmp_exp_2, tmp_scale ); + move16(); + if ( *p_sec_I_vec_x_fx == 0 ) + { + *p_sec_I_vec_x_exp = 0; + move16(); + } + + tmp_scale = sub( W_norm( sec_I_vec_y_64_fx ), 32 ); + *p_sec_I_vec_y_fx = W_shl_sat_l( sec_I_vec_y_64_fx, tmp_scale ); + move32(); + *p_sec_I_vec_y_exp = sub( tmp_exp_2, tmp_scale ); + move16(); + if ( *p_sec_I_vec_y_fx == 0 ) + { + *p_sec_I_vec_y_exp = 0; + move16(); + } + + tmp_scale = sub( W_norm( sec_I_vec_z_64_fx ), 32 ); + *p_sec_I_vec_z_fx = W_shl_sat_l( sec_I_vec_z_64_fx, tmp_scale ); + move32(); + *p_sec_I_vec_z_exp = sub( tmp_exp_2, tmp_scale ); + move16(); + if ( *p_sec_I_vec_z_fx == 0 ) + { + *p_sec_I_vec_z_exp = 0; + move16(); } +#endif } ELSE { @@ -1267,6 +1367,18 @@ void calculate_hodirac_sector_parameters_fx( Word32 sec_w_imag_fx, sec_x_imag_fx, sec_y_imag_fx, sec_z_imag_fx; Word32 sec_w_real_fx, sec_x_real_fx, sec_y_real_fx, sec_z_real_fx; +#ifdef OPT_SBA_ENC_V2_NBE + sec_w_imag_fx = Msub_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_0_fx ) ), HODIRAC_FAC1, *( p_ImagBuffer_1_fx ) ); // Qfac - 2 + sec_x_imag_fx = Msub_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_3_fx++ ) ), HODIRAC_FAC2, *( p_ImagBuffer_4_fx++ ) ); // Qfac - 2 + sec_y_imag_fx = Madd_32_32( ( Madd_32_32( ( Madd_32_32( Mpy_32_32( -HODIRAC_FAC3, *( p_ImagBuffer_0_fx++ ) ), HODIRAC_FAC1, *( p_ImagBuffer_1_fx++ ) ) ), HODIRAC_FAC3, *( p_ImagBuffer_6_fx++ ) ) ), HODIRAC_FAC2, *( p_ImagBuffer_8_fx++ ) ); // Qfac - 2 + sec_z_imag_fx = Msub_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_2_fx++ ) ), HODIRAC_FAC2, *( p_ImagBuffer_5_fx++ ) ); // Qfac - 2 + + sec_w_real_fx = Msub_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_0_fx ) ), HODIRAC_FAC1, *( p_RealBuffer_1_fx ) ); // Qfac - 2 + sec_x_real_fx = Msub_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_3_fx++ ) ), HODIRAC_FAC2, *( p_RealBuffer_4_fx++ ) ); // Qfac - 2 + sec_y_real_fx = Madd_32_32( ( Madd_32_32( ( Madd_32_32( Mpy_32_32( -HODIRAC_FAC3, *( p_RealBuffer_0_fx++ ) ), HODIRAC_FAC1, *( p_RealBuffer_1_fx++ ) ) ), HODIRAC_FAC3, *( p_RealBuffer_6_fx++ ) ) ), HODIRAC_FAC2, *( p_RealBuffer_8_fx++ ) ); // Qfac - 2 + sec_z_real_fx = Msub_32_32( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_2_fx++ ) ), HODIRAC_FAC2, *( p_RealBuffer_5_fx++ ) ); // Qfac - 2 + +#else sec_w_imag_fx = L_sub( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_0_fx ) ), Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_1_fx ) ) ); // Qfac - 2 sec_x_imag_fx = L_sub( Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_3_fx++ ) ), Mpy_32_32( HODIRAC_FAC2, *( p_ImagBuffer_4_fx++ ) ) ); // Qfac - 2 sec_y_imag_fx = L_add( L_add( L_add( Mpy_32_32( -HODIRAC_FAC3, *( p_ImagBuffer_0_fx++ ) ), Mpy_32_32( HODIRAC_FAC1, *( p_ImagBuffer_1_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC3, *( p_ImagBuffer_6_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC2, *( p_ImagBuffer_8_fx++ ) ) ); // Qfac - 2 @@ -1276,20 +1388,21 @@ void calculate_hodirac_sector_parameters_fx( sec_x_real_fx = L_sub( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_3_fx++ ) ), Mpy_32_32( HODIRAC_FAC2, *( p_RealBuffer_4_fx++ ) ) ); // Qfac - 2 sec_y_real_fx = L_add( L_add( L_add( Mpy_32_32( -HODIRAC_FAC3, *( p_RealBuffer_0_fx++ ) ), Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_1_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC3, *( p_RealBuffer_6_fx++ ) ) ), Mpy_32_32( HODIRAC_FAC2, *( p_RealBuffer_8_fx++ ) ) ); // Qfac - 2 sec_z_real_fx = L_sub( Mpy_32_32( HODIRAC_FAC1, *( p_RealBuffer_2_fx++ ) ), Mpy_32_32( HODIRAC_FAC2, *( p_RealBuffer_5_fx++ ) ) ); // Qfac - 2 - +#endif +#ifndef OPT_SBA_ENC_V2_NBE Word16 p_q; + Word32 tmp_x, tmp_y, tmp_z; + Word16 n, n1, n2; +#endif p_real_fx = Mpy_32_32( sec_w_real_fx, w_fx ); // ( Qfac - 2 ) + 30 - 31 = Qfac - 3 p_imag_fx = Mpy_32_32( sec_w_imag_fx, w_fx ); // ( Qfac - 2 ) + 30 - 31 = Qfac - 3 - Word32 tmp_x, tmp_y, tmp_z; - Word16 n, n1, n2; - +#ifndef OPT_SBA_ENC_V2_NBE n1 = norm_l( p_real_fx ); n2 = norm_l( p_imag_fx ); n = s_min( n1, n2 ); - p_real_fx = L_shl( p_real_fx, n ); p_imag_fx = L_shl( p_imag_fx, n ); @@ -1307,13 +1420,35 @@ void calculate_hodirac_sector_parameters_fx( move32(); *p_sec_I_vec_z_fx = BASOP_Util_Add_Mant32Exp( *p_sec_I_vec_z_fx, *p_sec_I_vec_z_exp, tmp_z, tmp_exp, p_sec_I_vec_z_exp ); move32(); - - Word32 tmp1; +#else + temp_x64 = W_mac_32_32( W_mult_32_32( p_real_fx, sec_x_real_fx ), p_imag_fx, sec_x_imag_fx ); // ( Qfac - 3 ) + ( Qfac - 2 ) + 1 = 2 * Qfac - 4 + temp_y64 = W_mac_32_32( W_mult_32_32( p_real_fx, sec_y_real_fx ), p_imag_fx, sec_y_imag_fx ); // ( Qfac - 3 ) + ( Qfac - 2 ) + 1 = 2 * Qfac - 4 + temp_z64 = W_mac_32_32( W_mult_32_32( p_real_fx, sec_z_real_fx ), p_imag_fx, sec_z_imag_fx ); // ( Qfac - 3 ) + ( Qfac - 2 ) + 1 = 2 * Qfac - 4 + sec_I_vec_x_64_fx = W_add( sec_I_vec_x_64_fx, temp_x64 ); + sec_I_vec_y_64_fx = W_add( sec_I_vec_y_64_fx, temp_y64 ); + sec_I_vec_z_64_fx = W_add( sec_I_vec_z_64_fx, temp_z64 ); + + Word64 tmp1; +#endif Word64 tmp2, tmp3, tmp4, sec_sum64; +#ifndef OPT_SBA_ENC_V2_NBE + Word32 tmp1; Word32 tmp5, sec_sum; tmp_exp = sub( 62, add( p_q, p_q ) ); tmp1 = BASOP_Util_Add_Mant32Exp( Mpy_32_32( p_real_fx, p_real_fx ), tmp_exp, Mpy_32_32( p_imag_fx, p_imag_fx ), tmp_exp, &tmp_exp ); +#endif +#ifdef OPT_SBA_ENC_V2_NBE + tmp1 = W_mac_32_32( W_mult_32_32( p_real_fx, p_real_fx ), p_imag_fx, p_imag_fx ); // 2 * (Qfac - 3) + 1 + tmp1 = W_shl( tmp1, 2 ); // 2 * (Qfac - 2) + 1 + tmp2 = W_mac_32_32( W_mult_32_32( sec_x_real_fx, sec_x_real_fx ), sec_x_imag_fx, sec_x_imag_fx ); // 2 * (Qfac - 2) + 1 + tmp3 = W_mac_32_32( W_mult_32_32( sec_y_real_fx, sec_y_real_fx ), sec_y_imag_fx, sec_y_imag_fx ); // 2 * (Qfac - 2) + 1 + tmp4 = W_mac_32_32( W_mult_32_32( sec_z_real_fx, sec_z_real_fx ), sec_z_imag_fx, sec_z_imag_fx ); // 2 * (Qfac - 2) + 1 + sec_sum64 = W_add( tmp1, W_add( W_add( tmp2, tmp3 ), tmp4 ) ); // 2 * (Qfac - 2) + 1 + + // instead dividing changed Q// + energy_64_fx = W_add( energy_64_fx, sec_sum64 ); // 2 * (Qfac - 2) + 1 + 1 +#else tmp2 = W_add( W_mult0_32_32( sec_x_real_fx, sec_x_real_fx ), W_mult0_32_32( sec_x_imag_fx, sec_x_imag_fx ) ); // 2 * (Qfac - 2) tmp3 = W_add( W_mult0_32_32( sec_y_real_fx, sec_y_real_fx ), W_mult0_32_32( sec_y_imag_fx, sec_y_imag_fx ) ); // 2 * (Qfac - 2) tmp4 = W_add( W_mult0_32_32( sec_z_real_fx, sec_z_real_fx ), W_mult0_32_32( sec_z_imag_fx, sec_z_imag_fx ) ); // 2 * (Qfac - 2) @@ -1322,14 +1457,56 @@ void calculate_hodirac_sector_parameters_fx( sec_sum64 = W_shl( sec_sum64, n ); // 2 * (Qfac - 2) + n sec_sum = W_extract_h( sec_sum64 ); // 2 * (Qfac - 2) + n - 32 = 2 * Qfac + n - 36 sec_sum_exp = sub( 67, add( shl( Qfac, 1 ), n ) ); - // divide by 2: tmp1 = L_shr( tmp1, 1 ); sec_sum = L_shr( sec_sum, 1 ); - tmp5 = BASOP_Util_Add_Mant32Exp( tmp1, tmp_exp, sec_sum, sec_sum_exp, &tmp_exp ); energy_fx = BASOP_Util_Add_Mant32Exp( energy_fx, energy_exp, tmp5, tmp_exp, &energy_exp ); +#endif + } +#ifdef OPT_SBA_ENC_V2_NBE + tmp_scale = sub( W_norm( energy_64_fx ), 32 ); + energy_fx = W_shl_sat_l( energy_64_fx, tmp_scale ); + energy_exp = sub( tmp_exp_1, tmp_scale ); + if ( energy_fx == 0 ) + { + energy_exp = 0; + move16(); + } + + tmp_scale = sub( W_norm( sec_I_vec_x_64_fx ), 32 ); + *p_sec_I_vec_x_fx = W_shl_sat_l( sec_I_vec_x_64_fx, tmp_scale ); + move32(); + *p_sec_I_vec_x_exp = sub( tmp_exp_2, tmp_scale ); + move16(); + if ( *p_sec_I_vec_x_fx == 0 ) + { + *p_sec_I_vec_x_exp = 0; + move16(); + } + + tmp_scale = sub( W_norm( sec_I_vec_y_64_fx ), 32 ); + *p_sec_I_vec_y_fx = W_shl_sat_l( sec_I_vec_y_64_fx, tmp_scale ); + move32(); + *p_sec_I_vec_y_exp = sub( tmp_exp_2, tmp_scale ); + move16(); + if ( *p_sec_I_vec_y_fx == 0 ) + { + *p_sec_I_vec_y_exp = 0; + move16(); } + + tmp_scale = sub( W_norm( sec_I_vec_z_64_fx ), 32 ); + *p_sec_I_vec_z_fx = W_shl_sat_l( sec_I_vec_z_64_fx, tmp_scale ); + move32(); + *p_sec_I_vec_z_exp = sub( tmp_exp_2, tmp_scale ); + move16(); + if ( *p_sec_I_vec_z_fx == 0 ) + { + *p_sec_I_vec_z_exp = 0; + move16(); + } +#endif } IF( hDirAC->firstrun_sector_params ) @@ -1354,7 +1531,62 @@ void calculate_hodirac_sector_parameters_fx( ELSE { Word32 w_fx = L_sub( ONE_IN_Q30, beta_fx ); // Q30 +#ifdef OPT_SBA_ENC_V2_NBE + Word32 tmp_1, tmp_2, tmp_3, tmp_sec_1, tmp_sec_2, tmp_sec_3; + Word16 e_x, e_y, e_z; + move16(); + + e_x = s_max( *p_sec_I_vec_x_exp, *p_sec_I_vec_smth_x_exp ); + e_y = s_max( *p_sec_I_vec_y_exp, *p_sec_I_vec_smth_y_exp ); + e_z = s_max( *p_sec_I_vec_z_exp, *p_sec_I_vec_smth_z_exp ); + + tmp_1 = L_shr( *p_sec_I_vec_x_fx, sub( e_x, *p_sec_I_vec_x_exp ) ); // e_x + tmp_2 = L_shr( *p_sec_I_vec_y_fx, sub( e_y, *p_sec_I_vec_y_exp ) ); // e_y + tmp_3 = L_shr( *p_sec_I_vec_z_fx, sub( e_z, *p_sec_I_vec_z_exp ) ); // e_z + tmp_sec_1 = L_shr( *p_sec_I_vec_smth_x_fx, sub( e_x, *p_sec_I_vec_smth_x_exp ) ); // e_x + tmp_sec_2 = L_shr( *p_sec_I_vec_smth_y_fx, sub( e_y, *p_sec_I_vec_smth_y_exp ) ); // e_y + tmp_sec_3 = L_shr( *p_sec_I_vec_smth_z_fx, sub( e_z, *p_sec_I_vec_smth_z_exp ) ); // e_z + + + temp_x64 = W_mac_32_32( W_mult_32_32( w_fx, tmp_1 ), beta_fx, tmp_sec_1 ); // 31-e_x+30+1=62-e_x + temp_y64 = W_mac_32_32( W_mult_32_32( w_fx, tmp_2 ), beta_fx, tmp_sec_2 ); // 31-e_y+30+1=62-e_y + temp_z64 = W_mac_32_32( W_mult_32_32( w_fx, tmp_3 ), beta_fx, tmp_sec_3 ); // 31-e_z+30+1=62-e_z + + tmp_scale = sub( W_norm( temp_x64 ), 32 ); + *p_sec_I_vec_smth_x_fx = W_shl_sat_l( temp_x64, tmp_scale ); + move32(); + *p_sec_I_vec_smth_x_exp = sub( sub( e_x, 31 ), tmp_scale ); // 31-(62-e_x+tmp_scale)=e_x-tmp_scale-31 + move16(); + if ( *p_sec_I_vec_smth_x_fx == 0 ) + { + *p_sec_I_vec_smth_x_exp = 0; + move16(); + } + + tmp_scale = sub( W_norm( temp_y64 ), 32 ); + *p_sec_I_vec_smth_y_fx = W_shl_sat_l( temp_y64, tmp_scale ); + move32(); + *p_sec_I_vec_smth_y_exp = sub( sub( e_y, 31 ), tmp_scale ); // 31-(62-e_z+tmp_scale)=e_x-tmp_scale-31 + move16(); + if ( *p_sec_I_vec_smth_y_fx == 0 ) + { + *p_sec_I_vec_smth_y_exp = 0; + move16(); + } + tmp_scale = sub( W_norm( temp_z64 ), 32 ); + *p_sec_I_vec_smth_z_fx = W_shl_sat_l( temp_z64, tmp_scale ); + move32(); + *p_sec_I_vec_smth_z_exp = sub( sub( e_z, 31 ), tmp_scale ); // 31-(62-e_z+tmp_scale)=e_x-tmp_scale-31 + move16(); + if ( *p_sec_I_vec_smth_z_fx == 0 ) + { + *p_sec_I_vec_smth_z_exp = 0; + move16(); + } + + *p_energy_smth_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( w_fx, energy_fx ), add( energy_exp, 1 ), Mpy_32_32( beta_fx, *p_energy_smth_fx ), add( *p_energy_smth_exp, 1 ), p_energy_smth_exp ); +#else *p_sec_I_vec_smth_x_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( w_fx, *p_sec_I_vec_x_fx ), add( *p_sec_I_vec_x_exp, 1 ), Mpy_32_32( beta_fx, *p_sec_I_vec_smth_x_fx ), add( *p_sec_I_vec_smth_x_exp, 1 ), p_sec_I_vec_smth_x_exp ); move32(); *p_sec_I_vec_smth_y_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( w_fx, *p_sec_I_vec_y_fx ), add( *p_sec_I_vec_y_exp, 1 ), Mpy_32_32( beta_fx, *p_sec_I_vec_smth_y_fx ), add( *p_sec_I_vec_smth_y_exp, 1 ), p_sec_I_vec_smth_y_exp ); @@ -1362,6 +1594,8 @@ void calculate_hodirac_sector_parameters_fx( *p_sec_I_vec_smth_z_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( w_fx, *p_sec_I_vec_z_fx ), add( *p_sec_I_vec_z_exp, 1 ), Mpy_32_32( beta_fx, *p_sec_I_vec_smth_z_fx ), add( *p_sec_I_vec_smth_z_exp, 1 ), p_sec_I_vec_smth_z_exp ); move32(); *p_energy_smth_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( w_fx, energy_fx ), add( energy_exp, 1 ), Mpy_32_32( beta_fx, *p_energy_smth_fx ), add( *p_energy_smth_exp, 1 ), p_energy_smth_exp ); +#endif + move32(); } IF( LT_32( energy_fx, EPSILON_FX_SMALL ) ) @@ -1404,7 +1638,7 @@ void calculate_hodirac_sector_parameters_fx( *p_ene_exp = *p_energy_smth_exp; move16(); - tmp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( normI_fx, L_add( *p_energy_smth_fx, EPSILON_FX_SMALL ), &tmp_e ) ); + tmp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( normI_fx, L_add_sat( *p_energy_smth_fx, EPSILON_FX_SMALL ), &tmp_e ) ); tmp_e = add( tmp_e, sub( normI_exp, *p_energy_smth_exp ) ); tmp32 = BASOP_Util_Add_Mant32Exp( ONE_IN_Q30, 1, L_negate( tmp32 ), tmp_e, &tmp_e ); *p_diff_fx = tmp32; @@ -1438,6 +1672,18 @@ void calculate_hodirac_sector_parameters_fx( } ELSE { +#ifdef OPT_SBA_ENC_V2_NBE + tmp32_1 = L_sub( ONE_IN_Q29, tmp_diff_fx ); + tmp32_2 = L_sub( tmp_diff_fx, ONE_IN_Q29 / 2 ); + + // *p_azi = 2.f * (1.f - tmp_diff) * *p_azi + (2.f * tmp_diff - 1.f) * *p_azi_prev + *p_azi_fx = L_shl( Madd_32_32( Mpy_32_32( tmp32_1, *p_azi_fx ), tmp32_2, *p_azi_prev_fx ), 3 ); // Q29 + Q23 - 31 + 2 = Q23 + move32(); + + // *p_ele = 2.f * (1.f - tmp_diff) * *p_ele + (2.f * tmp_diff - 1.f) * *p_ele_prev + *p_ele_fx = L_shl( Madd_32_32( Mpy_32_32( tmp32_1, *p_ele_fx ), tmp32_2, *p_ele_prev_fx ), 3 ); // Q29 + Q23 - 31 + 2 = Q23 + move32(); +#else *p_azi_fx = L_shl( L_add( Mpy_32_32( L_sub( ONE_IN_Q29, tmp_diff_fx ), *p_azi_fx ), Mpy_32_32( L_sub( tmp_diff_fx, ONE_IN_Q29 / 2 ), *p_azi_prev_fx ) ), 1 ); // Q29 + Q23 - 31 = Q21 move32(); *p_ele_fx = L_shl( L_add( Mpy_32_32( L_sub( ONE_IN_Q29, tmp_diff_fx ), *p_ele_fx ), Mpy_32_32( L_sub( tmp_diff_fx, ONE_IN_Q29 / 2 ), *p_ele_prev_fx ) ), 1 ); // Q29 + Q23 - 31 = Q21 @@ -1446,6 +1692,7 @@ void calculate_hodirac_sector_parameters_fx( move32(); *p_ele_fx = L_shl( *p_ele_fx, 2 ); // Q21 -> Q23; move32(); +#endif } } ELSE diff --git a/lib_com/options.h b/lib_com/options.h index 5f33170bfe838b11fa37b3ddc9e83939a52e41b4..bd73d566e9ea464725afb3522bb78bc1f1ccda69 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -82,6 +82,7 @@ #define OPT_HEAD_ROT_REND_V1_BE #define OPT_SBA_DEC_V2_BE #define OPT_SBA_ENC_V2_BE +#define OPT_SBA_ENC_V2_NBE #define OPT_SBA_ENC_V1_BE #define OPT_SBA_DEC_PATH /* Optimization made in SBA decoding path */ #define OPT_IVAS_FILTER_ROM /* Optimization made in IVAS filter table */