diff --git a/lib_com/options.h b/lib_com/options.h index 73f5a409e4bf5d7d3315e951bd455e9442826e64..32c53988aa4942a91bd1aac6f5a72f183a24cbad 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -75,6 +75,9 @@ #define FIX_1379_MASA_ANGLE_ROUND /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ +#define OPT_SBA_REND_V1_BE +#define OPT_HEAD_ROT_REND_V1_BE +#define OPT_SBA_DEC_V2_BE #define OPT_SBA_ENC_V1_BE #define OPT_BIN_RENDERER_V1 #define OPT_BIN_RENDERER_V2 diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index b9084df8821060f9ee23f849fca3115bb82047dd..8e8a2b34368a34a551c8940955044ae3d9813674 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2634,9 +2634,16 @@ void IMDCT_ivas_fx( Word32 fac; // fac = shl_sat( mult_r( extract_h( L_shr_sat( hTcxDec->conceal_eof_gain32, sub( 1, hTcxDec->conceal_eof_gain_e ) ) ), st->last_concealed_gain_syn_deemph ), 1 ); fac = Mpy_32_16_1( hTcxDec->conceal_eof_gain32, st->last_concealed_gain_syn_deemph ); // q = 31 - hTcxDec->conceal_eof_gain_e - last_concealed_gain_syn_deemph_e +#ifdef OPT_SBA_DEC_V2_BE + Word16 eff_e = add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e ); +#endif /* OPT_SBA_DEC_V2_BE */ FOR( Word16 ind = 0; ind < overlap; ind++ ) { +#ifdef OPT_SBA_DEC_V2_BE + old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), eff_e ) ); // Q(-2) +#else /* OPT_SBA_DEC_V2_BE */ old_syn_overl_fx[ind] = extract_h( L_shl_sat( Mpy_32_16_1( fac, old_syn_overl_fx[ind] ), add( hTcxDec->conceal_eof_gain_e, st->last_concealed_gain_syn_deemph_e ) ) ); // Q(-2) +#endif /* OPT_SBA_DEC_V2_BE */ move16(); } } @@ -4713,9 +4720,16 @@ void decoder_tcx_noiseshaping_igf_fx( { /* If the exponent on the spec side (i>L_frame) is lesser, then shift all the values in the spec side by the difference to make both sides have the same exponent. */ +#ifdef OPT_SBA_DEC_V2_BE + Word16 diff_e = sub( frame_side_x_e, spec_side_x_e ); +#endif /* OPT_SBA_DEC_V2_BE */ FOR( i = L_frame; i < L_spec; i++ ) { +#ifdef OPT_SBA_DEC_V2_BE + x_fx[i] = L_shr( x_fx[i], diff_e ); +#else /* OPT_SBA_DEC_V2_BE */ x_fx[i] = L_shr( x_fx[i], sub( frame_side_x_e, spec_side_x_e ) ); +#endif /* OPT_SBA_DEC_V2_BE */ move32(); } } @@ -4723,9 +4737,16 @@ void decoder_tcx_noiseshaping_igf_fx( { /* If the exponent on the spec side (i>L_frame) is greater, then shift all the values in the frame side (ihInputSetup->nchan_out_woLFE; chIdx++ ) { FOR( k = 0; k < numTimeSlots; k++ ) @@ -2105,6 +2106,22 @@ void ivas_binRenderer_fx( } } } + +#ifdef OPT_SBA_DEC_V2_BE + Word16 len = sub( CLDFB_NO_CHANNELS_MAX, hBinRenderer->conv_band ); + + FOR( pos_idx = 0; pos_idx < num_poses; pos_idx++ ) + { + FOR( k = 0; k < numTimeSlots; k++ ) + { + set32_fx( &Cldfb_RealBuffer_Binaural_fx[pos_idx][0][k][hBinRenderer->conv_band], 0, len ); + set32_fx( &Cldfb_RealBuffer_Binaural_fx[pos_idx][1][k][hBinRenderer->conv_band], 0, len ); + set32_fx( &Cldfb_ImagBuffer_Binaural_fx[pos_idx][0][k][hBinRenderer->conv_band], 0, len ); + set32_fx( &Cldfb_ImagBuffer_Binaural_fx[pos_idx][1][k][hBinRenderer->conv_band], 0, len ); + } + } +#endif /* OPT_SBA_DEC_V2_BE */ + pop_wmops(); return; } diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index caddb5c5cc9fe94936879def3f490ad0ae42ed6b..598f9082945fd0eb2385355d5460e71e4611cd79 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -605,8 +605,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE { Word16 shifter; - +#ifdef OPT_SBA_DEC_V2_BE + shifter = sub( mixing_matrix_res_smooth_e, 31 ); +#else /* OPT_SBA_DEC_V2_BE */ shifter = 31 - mixing_matrix_res_smooth_e; +#endif /* OPT_SBA_DEC_V2_BE */ FOR( ch_idx = 0; ch_idx < nY; ch_idx++ ) { int i; @@ -625,8 +628,13 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) ); idx += nY; } +#ifdef OPT_SBA_DEC_V2_BE + Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter ); + Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter ); +#else /* OPT_SBA_DEC_V2_BE */ Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) ); Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) ); +#endif /* OPT_SBA_DEC_V2_BE */ } } diff --git a/lib_dec/ivas_spar_decoder_fx.c b/lib_dec/ivas_spar_decoder_fx.c index 7f12c592ed95f8864c686557a7c4b35679fb8ac0..da7e1c0994c859259348f36badfa86e553dc157a 100644 --- a/lib_dec/ivas_spar_decoder_fx.c +++ b/lib_dec/ivas_spar_decoder_fx.c @@ -1163,6 +1163,69 @@ void ivas_spar_get_parameters_fx( move16(); Word16 add_weight_fx = sub( MAX_WORD16, weight_fx ); Word16 add_weight_20ms_fx = sub( MAX_WORD16, weight_20ms_fx ); +#ifdef OPT_SBA_DEC_V2_BE + Word16 out_flag[IVAS_MAX_FB_MIXER_OUT_CH]; + + Word32 band_bool = LT_16( split_band, IVAS_MAX_NUM_BANDS ); + + FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ ) + { + /* 20ms cross-fade for Transport channels in all frequency bands */ + /* sub-frame processing for missing channels in all frequency bands*/ + out_flag[out_ch] = band_bool && ( 0 == ivas_is_res_channel( out_ch, hSpar->hMdDec->spar_md_cfg.nchan_transport ) ); + move16(); + } + Word32 frame_bool = GT_16( hSpar->i_subframe, 3 ); + + FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ ) + { + IF( out_flag[out_ch] ) + { + IF( frame_bool ) + { + FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) + { + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + { + par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ), + hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], add_weight_fx ); + move32(); + } + } + } + ELSE + { + + + FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) + { + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + { + { + par_mat_fx[out_ch][in_ch][spar_band] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band]; /*hSpar->hMdDec->Q_mixer_mat*/ + move32(); + } + } + } + } + } + ELSE + { + FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) + { + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + { + /* 20ms Transport channel reconstruction with matching encoder/decoder processing */ + Word16 prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */ + move16(); + par_mat_fx[out_ch][in_ch][spar_band] = Madd_32_16( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], add_weight_20ms_fx ), + hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ); /*hSpar->hMdDec->Q_mixer_mat*/ + move32(); + } + } + } + } +#else /* OPT_SBA_DEC_V2_BE */ FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) { FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ ) @@ -1202,7 +1265,7 @@ void ivas_spar_get_parameters_fx( } } } - +#endif /* OPT_SBA_DEC_V2_BE */ return; } diff --git a/lib_dec/ivas_spar_md_dec_fx.c b/lib_dec/ivas_spar_md_dec_fx.c index 7f487a75c272279cbc0ab168f8116743069db676..36b77732ae7e3eb7e66d842147d5a9830d7bae39 100644 --- a/lib_dec/ivas_spar_md_dec_fx.c +++ b/lib_dec/ivas_spar_md_dec_fx.c @@ -1318,6 +1318,14 @@ static void ivas_get_spar_matrices_fx( tmp_C2_re_fx[0][j] = Mpy_32_32( active_w_dm_fac_fx, L_negate( hMdDec->spar_md.band_coeffs[( b + ( i_ts * IVAS_MAX_NUM_BANDS ) )].pred_re_fx[j - 1] ) ); // Q31 *Q22=Q22 move32(); } +#ifdef OPT_SBA_DEC_V2_BE + re_fx1 = Madd_32_32( ONE_IN_Q13, tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q13+Q13 + + re_fx1 = Madd_32_32( re_fx1, tmp_C2_re_fx[0][2], tmp_C1_re_fx[2][0] ); // Q13+Q13 + + tmp_dm_re_fx[0][0] = L_shl( Madd_32_32( re_fx1, tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ), Q9 ); // (Q13+Q13) << Q9 = Q22; + // +#else /* OPT_SBA_DEC_V2_BE */ re_fx = Mpy_32_32( tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q22 *Q22 =Q13 re_fx1 = L_add( ONE_IN_Q13, re_fx ); // Q13+Q13 @@ -1326,6 +1334,7 @@ static void ivas_get_spar_matrices_fx( re_fx = Mpy_32_32( tmp_C2_re_fx[0][3], tmp_C1_re_fx[3][0] ); // Q22 *Q22 =Q13 tmp_dm_re_fx[0][0] = L_shl( L_add( re_fx1, re_fx ), Q9 ); // (Q13+Q13) << Q9 = Q22; +#endif /* OPT_SBA_DEC_V2_BE */ move32(); IF( EQ_16( dyn_active_w_flag, 1 ) ) @@ -1401,7 +1410,11 @@ static void ivas_get_spar_matrices_fx( { FOR( k = dmx_ch; k < numch_out; k++ ) { +#ifndef OPT_SBA_DEC_V2_BE IF( EQ_16( sub( j, dmx_ch ), sub( k, dmx_ch ) ) ) +#else /* OPT_SBA_DEC_V2_BE */ + IF( EQ_16( j, k ) ) +#endif /* OPT_SBA_DEC_V2_BE */ { tmpP_re_fx[j][k] = hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].P_re_fx[sub( k, dmx_ch )]; // Q22 move32(); diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c index 0a0d15786fc5e597051697da07150a351543e03f..7a27cf339dc2e03552bd858ac6696c3dce732415 100644 --- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c +++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c @@ -3758,6 +3758,169 @@ static void eig2x2_fx( move16(); move16(); +#ifdef OPT_SBA_REND_V1_BE + /* Eigenvectors */ + FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) + { + Word16 q_diff = sub( q_e, *q_D ); + IF( q_diff > 0 ) + { + tmp1 = L_sub( D_fx[ch], L_shr( e1, q_diff ) ); + tmp2 = L_sub( D_fx[ch], L_shr( e2, q_diff ) ); + q_tmp1 = *q_D; + move16(); + } + ELSE + { + tmp1 = L_sub( L_shl( D_fx[ch], q_diff ), e1 ); + tmp2 = L_sub( L_shl( D_fx[ch], q_diff ), e2 ); + q_tmp1 = q_e; + move16(); + } + + IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) + { + s_fx = tmp2; + move32(); + exp = sub( norm_l( s_fx ), 1 ); + tmp2 = Mpy_32_32( s_fx, s_fx ); + q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); + + tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); + q_tmp2 = sub( 31, q_tmp2 ); + + tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); + + tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); + exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); + normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 + q_tmp2 = sub( 31, exp ); + + q_diff = sub( q_c, q_tmp1 ); + IF( q_diff > 0 ) + { + c_re = L_shr( c_re, q_diff ); + c_im = L_shr( c_im, q_diff ); + q_c = q_tmp1; + move16(); + } + ELSE + { + s_fx = L_shl( s_fx, q_diff ); + q_tmp1 = q_c; + move16(); + } + + Ure_fx[0][ch] = Mpy_32_32( s_fx, normVal_fx ); + move32(); + Ure_fx[1][ch] = Mpy_32_32( c_re, normVal_fx ); + move32(); + Uim_fx[1][ch] = Mpy_32_32( c_im, normVal_fx ); + move32(); + q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 ); + + IF( q_U_2 != 0 ) + { + q_diff = sub( q_U_2, q_U_1 ); + IF( q_diff > 0 ) + { + Ure_fx[1][ch - 1] = L_shr( Ure_fx[1][ch - 1], q_diff ); + Ure_fx[0][ch - 1] = L_shr( Ure_fx[0][ch - 1], q_diff ); + Uim_fx[0][ch - 1] = L_shr( Uim_fx[0][ch - 1], q_diff ); + q_U_2 = q_U_1; + move32(); + move32(); + move32(); + move16(); + } + ELSE IF( GT_16( q_U_1, q_U_2 ) ) + { + Ure_fx[1][ch] = L_shl( Ure_fx[1][ch], q_diff ); + Ure_fx[0][ch] = L_shl( Ure_fx[0][ch], q_diff ); + Uim_fx[1][ch] = L_shl( Uim_fx[1][ch], q_diff ); + q_U_1 = q_U_2; + move32(); + move32(); + move32(); + move16(); + } + } + q_U_2 = q_U_1; + move16(); + } + ELSE + { + s_fx = tmp1; + move32(); + + exp = sub( norm_l( s_fx ), 1 ); + tmp2 = Mpy_32_32( s_fx, s_fx ); + q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); + + tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); + q_tmp2 = sub( 31, q_tmp2 ); + + tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); + + tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); + exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); + normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 + q_tmp2 = sub( 31, exp ); + + q_diff = sub( q_c, q_tmp1 ); + IF( q_diff > 0 ) + { + c_re = L_shr( c_re, q_diff ); + c_im = L_shr( c_im, q_diff ); + q_c = q_tmp1; + move16(); + } + ELSE + { + s_fx = L_shl( s_fx, q_diff ); + q_tmp1 = q_c; + move16(); + } + + Ure_fx[1][ch] = Mpy_32_32( s_fx, normVal_fx ); + move32(); + Ure_fx[0][ch] = Mpy_32_32( c_re, normVal_fx ); + move32(); + Uim_fx[0][ch] = Mpy_32_32( L_negate( c_im ), normVal_fx ); + move32(); + q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 ); + + IF( q_U_1 != 0 ) + { + q_diff = sub( q_U_2, q_U_1 ); + IF( q_diff > 0 ) + { + Ure_fx[1][ch] = L_shr( Ure_fx[1][ch], q_diff ); + Ure_fx[0][ch] = L_shr( Ure_fx[0][ch], q_diff ); + Uim_fx[0][ch] = L_shr( Uim_fx[0][ch], q_diff ); + q_U_2 = q_U_1; + move32(); + move32(); + move32(); + move16(); + } + ELSE IF( GT_16( q_U_1, q_U_2 ) ) + { + Ure_fx[1][ch - 1] = L_shl( Ure_fx[1][ch - 1], q_diff ); + Ure_fx[0][ch - 1] = L_shl( Ure_fx[0][ch - 1], q_diff ); + Uim_fx[1][ch - 1] = L_shl( Uim_fx[1][ch - 1], q_diff ); + q_U_1 = q_U_2; + move32(); + move32(); + move32(); + move16(); + } + } + q_U_1 = q_U_2; + move16(); + } + } +#else /* OPT_SBA_REND_V1_BE */ /* Eigenvectors */ FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { @@ -3914,6 +4077,8 @@ static void eig2x2_fx( move16(); } } +#endif /* OPT_SBA_REND_V1_BE */ + if ( q_U_1 != 0 ) { *q_U = q_U_1; diff --git a/lib_rend/ivas_dirac_output_synthesis_dec_fx.c b/lib_rend/ivas_dirac_output_synthesis_dec_fx.c index 78e32bc3c69b54026af5bbe9640a6dd03f67057b..87fa8b7d03f1a86713b618b3f862e76e6388880f 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec_fx.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec_fx.c @@ -2459,6 +2459,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( move16(); move16(); +#ifdef OPT_SBA_REND_V1_BE + Word32 cmp = W_shl_sat_l( DIRAC_GAIN_LIMIT_Q26, sub( h_dirac_output_synthesis_state->gains_dir_prev_q, 26 ) ); + Word32 cmp2 = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ); +#endif /* OPT_SBA_REND_V1_BE */ + FOR( k = 0; k < nchan_out_woLFE; k++ ) { Word32 power_smooth_temp; @@ -2506,11 +2511,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_dir ) = 0; move32(); } +#ifdef OPT_SBA_REND_V1_BE + ELSE IF( GT_32( *( p_gains_dir ), cmp ) ) + { + *( p_gains_dir ) = cmp; /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/ + move32(); + } +#else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) ) { *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/ move32(); } +#endif /* OPT_SBA_REND_V1_BE */ IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 ) { @@ -2543,11 +2556,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_diff ) = 0; move32(); } +#ifdef OPT_SBA_REND_V1_BE + ELSE IF( GT_32( *( p_gains_diff ), cmp2 ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ + { + *( p_gains_diff ) = cmp2; /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ + move32(); + } +#else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_diff ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ) ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ { *( p_gains_diff ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ); /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ move32(); } +#endif /* OPT_SBA_REND_V1_BE */ p_gains_diff++; } @@ -2558,15 +2579,25 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( g1 = alpha[l]; // Q31 move32(); g2 = L_sub( ONE_IN_Q31, g1 ); // Q31 +#ifdef OPT_SBA_REND_V1_BE + W_temp = W_mac_32_32( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ), + g2, ( *( p_cy_auto_dir_smooth_prev ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/ +#else /* OPT_SBA_REND_V1_BE */ W_temp = W_add( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ), W_mult_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/ +#endif /* OPT_SBA_REND_V1_BE */ q_tmp = W_norm( W_temp ); L_tmp = W_extract_h( W_shl( W_temp, q_tmp ) ); // q_cy_auto_dir_smooth_prev_local + q_tmp *( p_cy_auto_dir_smooth_prev++ ) = L_shr_r( L_tmp, q_tmp ); // q_cy_auto_dir_smooth_prev_local move32(); +#ifdef OPT_SBA_REND_V1_BE + *( p_cy_cross_dir_smooth_prev ) = Madd_32_32( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ), + g2, ( *( p_cy_cross_dir_smooth_prev ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev +#else /* OPT_SBA_REND_V1_BE */ *( p_cy_cross_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ), Mpy_32_32( g2, ( *( p_cy_cross_dir_smooth_prev ) ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev +#endif /* OPT_SBA_REND_V1_BE */ move32(); test(); if ( *( p_cy_cross_dir_smooth_prev ) == 0 && ( *( p_cy_cross_dir_smooth ) != 0 ) ) @@ -2598,11 +2629,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_dir ) = 0; move32(); } +#ifdef OPT_SBA_REND_V1_BE + ELSE IF( GT_32( *( p_gains_dir ), cmp ) ) /*gains_dir_prev_q*/ + { + *( p_gains_dir ) = cmp; /*gains_dir_prev_q*/ + move32(); + } +#else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) ) /*gains_dir_prev_q*/ { *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*gains_dir_prev_q*/ move32(); } +#endif /* OPT_SBA_REND_V1_BE */ IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 ) { @@ -2689,7 +2728,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( shl( i_mult( proto_direct_index[k], num_freq_bands ), Q1 ); FOR( l = 0; l < num_freq_bands; l++ ) { - g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q +#ifdef OPT_SBA_REND_V1_BE + g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q +#else /* OPT_SBA_REND_V1_BE */ + g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q +#endif /* OPT_SBA_REND_V1_BE */ Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ); // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer move64(); @@ -2711,7 +2754,12 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( { FOR( l = 0; l < h_dirac_output_synthesis_params->max_band_decorr; l++ ) { - g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q +#ifdef OPT_SBA_REND_V1_BE + g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q +#else /* OPT_SBA_REND_V1_BE */ + g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q + +#endif /* OPT_SBA_REND_V1_BE */ Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_RealBuffer64_fx[k][buf_idx][l], W_shr( W_mult0_32_32( g, ( *( p_power_smooth_diff++ ) ) ), negate( q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) -> gains_diff_prev_q + q_proto_direct_buffer move64(); @@ -2760,16 +2808,27 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( } } q_align = W_norm( W_temp ); +#ifdef OPT_SBA_REND_V1_BE + Word16 shift = sub( q_align, 32 ); +#endif /* OPT_SBA_REND_V1_BE */ + FOR( buf_idx = 0; buf_idx < nbslots; ++buf_idx ) { FOR( k = 0; k < nchan_out_woLFE; k++ ) { FOR( l = 0; l < num_freq_bands; l++ ) { +#ifdef OPT_SBA_REND_V1_BE + RealBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_RealBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ + move32(); + ImagBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_ImagBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ + move32(); +#else /* OPT_SBA_REND_V1_BE */ RealBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_RealBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); ImagBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_ImagBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); +#endif /* OPT_SBA_REND_V1_BE */ } } } diff --git a/lib_rend/ivas_dirac_rend_fx.c b/lib_rend/ivas_dirac_rend_fx.c index ad804a9309a53a973736763d70735cde316160d4..fb92eab13840d74c3d69ba1d783bb39a5f8c582a 100644 --- a/lib_rend/ivas_dirac_rend_fx.c +++ b/lib_rend/ivas_dirac_rend_fx.c @@ -3155,7 +3155,11 @@ void protoSignalComputation4_fx( sq_tmp_fx = Madd_32_32( Mpy_32_32( proto_frame_f_fx[idx], proto_frame_f_fx[idx] ), proto_frame_f_fx[idx + 1], proto_frame_f_fx[idx + 1] ); // 2*(proto_frame_f_q)-31 sq_tmp_q = sub( add( *proto_frame_f_q, *proto_frame_f_q ), 31 ); +#ifdef OPT_SBA_REND_V1_BE + proto_power_smooth_fx_q = s_min( *proto_power_smooth_q, sq_tmp_q ); + proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( L_shr( proto_power_smooth_fx[l + ( k * num_freq_bands )], sub( *proto_power_smooth_q, proto_power_smooth_fx_q ) ), L_shr( sq_tmp_fx, sub( sq_tmp_q, proto_power_smooth_fx_q ) ) ); // proto_power_smooth_fx_q +#else /* OPT_SBA_REND_V1_BE */ IF( LT_16( *proto_power_smooth_q, sq_tmp_q ) ) { proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( proto_power_smooth_fx[l + ( k * num_freq_bands )], L_shr( sq_tmp_fx, sub( sq_tmp_q, *proto_power_smooth_q ) ) ); // proto_power_smooth_q @@ -3170,8 +3174,8 @@ void protoSignalComputation4_fx( proto_power_smooth_fx_q = sq_tmp_q; move16(); } - - p_proto_buffer_fx[idx] = proto_frame_f_fx[idx]; // proto_frame_f_q +#endif /* OPT_SBA_REND_V1_BE */ + p_proto_buffer_fx[idx] = proto_frame_f_fx[idx]; // proto_frame_f_q move32(); p_proto_buffer_fx[idx + 1] = proto_frame_f_fx[idx + 1]; // proto_frame_f_q move32(); diff --git a/lib_rend/ivas_efap_fx.c b/lib_rend/ivas_efap_fx.c index fbcfdfe77cbc4cfe42131768b1c48e0667820f51..2ac397e2cb3bd0853b6531a7b914e5d045ee1b31 100644 --- a/lib_rend/ivas_efap_fx.c +++ b/lib_rend/ivas_efap_fx.c @@ -1528,7 +1528,7 @@ static void get_poly_gains_fx( #ifdef VEC_ARITH_OPT_v1 v_sub_fixed_no_hdrm( P, A, P_minus_A, 2 ); /* Precalculate value of (P-A) q22*/ #else /* VEC_ARITH_OPT_v1 */ - v_sub_fixed( P, A, P_minus_A, 2, 0 ); /* Precalculate value of (P-A) q22*/ + v_sub_fixed( P, A, P_minus_A, 2, 0 ); /* Precalculate value of (P-A) q22*/ #endif /* VEC_ARITH_OPT_v1 */ FOR( j = i; j < numChan - 2 + i; ++j ) @@ -1585,7 +1585,7 @@ static Word32 get_tri_gain_fx( #ifdef VEC_ARITH_OPT_v1 v_sub_fixed_no_hdrm( B, A, tmpSub1, 2 ); // tmpSub1 q22 #else /* VEC_ARITH_OPT_v1 */ - v_sub_fixed( B, A, tmpSub1, 2, 0 ); // tmpSub1 q22 + v_sub_fixed( B, A, tmpSub1, 2, 0 ); // tmpSub1 q22 #endif /* VEC_ARITH_OPT_v1 */ tmpDot1 = dotp_fixed( tmpN, tmpSub1, 2 ); // Q13 @@ -2248,7 +2248,7 @@ static void sort_channels_vertex_fx( #ifdef VEC_ARITH_OPT_v1 v_sub_fixed_no_hdrm( tmpV1, tmpV2, tmpV3, 3 ); // tmpV3 Q30 #else /* VEC_ARITH_OPT_v1 */ - v_sub_fixed( tmpV1, tmpV2, tmpV3, 3, 0 ); // tmpV3 Q30 + v_sub_fixed( tmpV1, tmpV2, tmpV3, 3, 0 ); // tmpV3 Q30 #endif /* VEC_ARITH_OPT_v1 */ Word16 exp2 = 2; move16(); @@ -2434,7 +2434,7 @@ static Word16 in_poly_fx( /* Angles are in Q22 */ #ifdef VEC_ARITH_OPT_v1 v_sub_fixed_no_hdrm( P, A, P_minus_A, 2 ); /* Precalculate value of (P-A) q22*/ #else /* VEC_ARITH_OPT_v1 */ - v_sub_fixed( P, A, P_minus_A, 2, 0 ); /* Precalculate value of (P-A) q22*/ + v_sub_fixed( P, A, P_minus_A, 2, 0 ); /* Precalculate value of (P-A) q22*/ #endif /* VEC_ARITH_OPT_v1 */ FOR( n = 1; n < sub( numVertices, 1 ); ++n ) @@ -2508,12 +2508,16 @@ static Word16 in_tri_fx( v_sub_fixed_no_hdrm( B, A, tmpDot1, 2 ); // tmpDot1 q22 v_sub_fixed_no_hdrm( C, A, tmpDot2, 2 ); // tmpDot2 q22 #else /* VEC_ARITH_OPT_v1 */ - v_sub_fixed( B, A, tmpDot1, 2, 0 ); // tmpDot1 q22 - v_sub_fixed( C, A, tmpDot2, 2, 0 ); // tmpDot2 q22 + v_sub_fixed( B, A, tmpDot1, 2, 0 ); // tmpDot1 q22 + v_sub_fixed( C, A, tmpDot2, 2, 0 ); // tmpDot2 q22 #endif /* VEC_ARITH_OPT_v1 */ /* Verification of the non-colinearity : Q22 * Q22 = Q13 */ +#ifdef OPT_SBA_REND_V1_BE + invFactor = Msub_32_32( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), tmpDot1[1], tmpDot2[0] ); /*q22+q22-q31->q13*/ +#else /* OPT_SBA_REND_V1_BE */ invFactor = L_sub( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), Mpy_32_32( tmpDot1[1], tmpDot2[0] ) ); /*q22+q22-q31->q13*/ +#endif /* OPT_SBA_REND_V1_BE */ IF( invFactor == 0 ) { diff --git a/lib_rend/ivas_rotation_fx.c b/lib_rend/ivas_rotation_fx.c index 5073b7418bd7d987fb91786a9b25c5f3e7fcc3ab..92ad8d36f4f55496349855e354687c3a25a0a9b3 100644 --- a/lib_rend/ivas_rotation_fx.c +++ b/lib_rend/ivas_rotation_fx.c @@ -1010,7 +1010,9 @@ void rotateFrame_shd_cldfb( Word16 l = 0, m1 = 0, m2 = 0; Word32 realRot[2 * HEADROT_ORDER + 1], imagRot[2 * HEADROT_ORDER + 1]; Word16 SHrotmat[HEADROT_SHMAT_DIM][HEADROT_SHMAT_DIM]; +#ifndef OPT_HEAD_ROT_REND_V1_BE Word32 temp1, temp2; +#endif /* OPT_HEAD_ROT_REND_V1_BE */ move16(); move16(); move16(); @@ -1059,12 +1061,19 @@ void rotateFrame_shd_cldfb( move32(); FOR( m = m1; m < m2; m++ ) { +#ifdef OPT_HEAD_ROT_REND_V1_BE + realRot[n - m1] = Madd_32_16_r( realRot[n - m1], Cldfb_RealBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15) + move32(); + imagRot[n - m1] = Madd_32_16_r( imagRot[n - m1], Cldfb_ImagBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15) + move32(); +#else /* OPT_HEAD_ROT_REND_V1_BE */ temp1 = Mpy_32_16_r( Cldfb_RealBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15) temp2 = Mpy_32_16_r( Cldfb_ImagBuffer[m][i][iBand], SHrotmat[n][m] ); // Q(x + 14 - 15) realRot[n - m1] = L_add( temp1, realRot[n - m1] ); // Q(x + 14 - 15) move32(); imagRot[n - m1] = L_add( temp2, imagRot[n - m1] ); // Q(x + 14 - 15) move32(); +#endif /* OPT_HEAD_ROT_REND_V1_BE */ } } /* write back the result */ diff --git a/lib_rend/ivas_vbap_fx.c b/lib_rend/ivas_vbap_fx.c index 7495953e3e462f5eca9be008502a81e7be8a6656..cfcbc67603388e210e713d9b2f2ea406795f9867 100644 --- a/lib_rend/ivas_vbap_fx.c +++ b/lib_rend/ivas_vbap_fx.c @@ -578,7 +578,11 @@ void vbap_determine_gains_fx( move32(); FOR( ch = 0; ch < 3; ch++ ) { +#ifdef OPT_SBA_REND_V1_BE + gain_ene_fx = Madd_32_32( gain_ene_fx, gain_triplet_fx[ch], gain_triplet_fx[ch] ); /* Q(2 * VBAP_VS_TRIPLET.q_inverse_matrix - 31) */ +#else /* OPT_SBA_REND_V1_BE */ gain_ene_fx = L_add( gain_ene_fx, Mpy_32_32( gain_triplet_fx[ch], gain_triplet_fx[ch] ) ); /* Q(2 * VBAP_VS_TRIPLET.q_inverse_matrix - 31) */ +#endif /* OPT_SBA_REND_V1_BE */ } norm_value_fx = Isqrt( L_shr( gain_ene_fx, 1 ) ); /* Q(31 - (2 * VBAP_VS_TRIPLET.q_inverse_matrix - 31 - 1) / 2 ) = Q(47 - VBAP_VS_TRIPLET.q_inverse_matrix) */ @@ -681,9 +685,35 @@ static UWord8 vector_matrix_multiply_3x3_fx( Word32 *result, /* o : output vector Q(q_matrix) */ Word16 q_matrix ) { - result[0] = Mpy_32_16_1( matrix[0][0], src_vector[0] ); /* Q(q_matrix) */ - result[0] = L_add( result[0], Mpy_32_16_1( matrix[1][0], src_vector[1] ) ); /* Q(q_matrix) */ - result[0] = L_add( result[0], Mpy_32_16_1( matrix[2][0], src_vector[2] ) ); /* Q(q_matrix) */ +#ifdef OPT_SBA_REND_V1_BE + Word32 pointzero_one = Mpy_32_16_1( L_lshl( 1, q_matrix ), -327 /* -0.01 in Q15 */ ); + result[0] = Madd_32_16( Madd_32_16( Mpy_32_16_1( matrix[0][0], src_vector[0] ), matrix[1][0], src_vector[1] ), matrix[2][0], src_vector[2] ); /* Q(q_matrix) */ + move32(); + + IF( LT_32( result[0], pointzero_one ) ) + { + return 0; + } + + result[1] = Madd_32_16( Madd_32_16( Mpy_32_16_1( matrix[0][1], src_vector[0] ), matrix[1][1], src_vector[1] ), matrix[2][1], src_vector[2] ); /* Q(q_matrix) */ + move32(); + + IF( LT_32( result[1], pointzero_one ) ) + { + return 0; + } + + result[2] = Madd_32_16( Madd_32_16( Mpy_32_16_1( matrix[0][2], src_vector[0] ), matrix[1][2], src_vector[1] ), matrix[2][2], src_vector[2] ); /* Q(q_matrix) */ + move32(); + + IF( LT_32( result[2], pointzero_one ) ) + { + return 0; + } +#else /* OPT_SBA_REND_V1_BE */ + result[0] = Mpy_32_16_1( matrix[0][0], src_vector[0] ); /* Q(q_matrix) */ + result[0] = L_add( result[0], Mpy_32_16_1( matrix[1][0], src_vector[1] ) ); /* Q(q_matrix) */ + result[0] = L_add( result[0], Mpy_32_16_1( matrix[2][0], src_vector[2] ) ); /* Q(q_matrix) */ move32(); move32(); move32(); @@ -716,7 +746,7 @@ static UWord8 vector_matrix_multiply_3x3_fx( { return 0; } - +#endif /* OPT_SBA_REND_V1_BE */ return 1; }