Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -75,6 +75,7 @@ #define FIX_1379_MASA_ANGLE_ROUND /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ #define OPT_SBA_REND_V1_BE #define OPT_SBA_ENC_V1_BE #define OPT_BIN_RENDERER_V1 #define OPT_BIN_RENDERER_V2 Loading lib_rend/ivas_dirac_dec_binaural_functions_fx.c +165 −0 Original line number Diff line number Diff line Loading @@ -3415,6 +3415,169 @@ static void eig2x2_fx( move16(); move16(); #ifdef OPT_SBA_REND_V1_BE /* Eigenvectors */ FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { Word16 q_diff = sub( q_e, *q_D ); IF( q_diff > 0 ) { tmp1 = L_sub( D_fx[ch], L_shr( e1, q_diff ) ); tmp2 = L_sub( D_fx[ch], L_shr( e2, q_diff ) ); q_tmp1 = *q_D; move16(); } ELSE { tmp1 = L_sub( L_shl( D_fx[ch], q_diff ), e1 ); tmp2 = L_sub( L_shl( D_fx[ch], q_diff ), e2 ); q_tmp1 = q_e; move16(); } IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { s_fx = tmp2; move32(); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { c_re = L_shr( c_re, q_diff ); c_im = L_shr( c_im, q_diff ); q_c = q_tmp1; move16(); } ELSE { s_fx = L_shl( s_fx, q_diff ); q_tmp1 = q_c; move16(); } Ure_fx[0][ch] = Mpy_32_32( s_fx, normVal_fx ); move32(); Ure_fx[1][ch] = Mpy_32_32( c_re, normVal_fx ); move32(); Uim_fx[1][ch] = Mpy_32_32( c_im, normVal_fx ); move32(); q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 ); IF( q_U_2 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); IF( q_diff > 0 ) { Ure_fx[1][ch - 1] = L_shr( Ure_fx[1][ch - 1], q_diff ); Ure_fx[0][ch - 1] = L_shr( Ure_fx[0][ch - 1], q_diff ); Uim_fx[0][ch - 1] = L_shr( Uim_fx[0][ch - 1], q_diff ); q_U_2 = q_U_1; move32(); move32(); move32(); move16(); } ELSE IF( GT_16( q_U_1, q_U_2 ) ) { Ure_fx[1][ch] = L_shl( Ure_fx[1][ch], q_diff ); Ure_fx[0][ch] = L_shl( Ure_fx[0][ch], q_diff ); Uim_fx[1][ch] = L_shl( Uim_fx[1][ch], q_diff ); q_U_1 = q_U_2; move32(); move32(); move32(); move16(); } } q_U_2 = q_U_1; move16(); } ELSE { s_fx = tmp1; move32(); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { c_re = L_shr( c_re, q_diff ); c_im = L_shr( c_im, q_diff ); q_c = q_tmp1; move16(); } ELSE { s_fx = L_shl( s_fx, q_diff ); q_tmp1 = q_c; move16(); } Ure_fx[1][ch] = Mpy_32_32( s_fx, normVal_fx ); move32(); Ure_fx[0][ch] = Mpy_32_32( c_re, normVal_fx ); move32(); Uim_fx[0][ch] = Mpy_32_32( L_negate( c_im ), normVal_fx ); move32(); q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 ); IF( q_U_1 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); IF( q_diff > 0 ) { Ure_fx[1][ch] = L_shr( Ure_fx[1][ch], q_diff ); Ure_fx[0][ch] = L_shr( Ure_fx[0][ch], q_diff ); Uim_fx[0][ch] = L_shr( Uim_fx[0][ch], q_diff ); q_U_2 = q_U_1; move32(); move32(); move32(); move16(); } ELSE IF( GT_16( q_U_1, q_U_2 ) ) { Ure_fx[1][ch - 1] = L_shl( Ure_fx[1][ch - 1], q_diff ); Ure_fx[0][ch - 1] = L_shl( Ure_fx[0][ch - 1], q_diff ); Uim_fx[1][ch - 1] = L_shl( Uim_fx[1][ch - 1], q_diff ); q_U_1 = q_U_2; move32(); move32(); move32(); move16(); } } q_U_1 = q_U_2; move16(); } } #else /* OPT_SBA_REND_V1_BE */ /* Eigenvectors */ FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { Loading Loading @@ -3571,6 +3734,8 @@ static void eig2x2_fx( move16(); } } #endif /* OPT_SBA_REND_V1_BE */ if ( q_U_1 != 0 ) { *q_U = q_U_1; Loading lib_rend/ivas_dirac_output_synthesis_dec_fx.c +61 −2 Original line number Diff line number Diff line Loading @@ -2465,6 +2465,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( move16(); move16(); #ifdef OPT_SBA_REND_V1_BE Word32 cmp = W_shl_sat_l( DIRAC_GAIN_LIMIT_Q26, sub( h_dirac_output_synthesis_state->gains_dir_prev_q, 26 ) ); Word32 cmp2 = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ); #endif /* OPT_SBA_REND_V1_BE */ FOR( k = 0; k < nchan_out_woLFE; k++ ) { Word32 power_smooth_temp; Loading Loading @@ -2512,11 +2517,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_dir ) = 0; move32(); } #ifdef OPT_SBA_REND_V1_BE ELSE IF( GT_32( *( p_gains_dir ), cmp ) ) { *( p_gains_dir ) = cmp; /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/ move32(); } #else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) ) { *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/ move32(); } #endif /* OPT_SBA_REND_V1_BE */ IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 ) { Loading Loading @@ -2549,11 +2562,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_diff ) = 0; move32(); } #ifdef OPT_SBA_REND_V1_BE ELSE IF( GT_32( *( p_gains_diff ), cmp2 ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ { *( p_gains_diff ) = cmp2; /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ move32(); } #else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_diff ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ) ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ { *( p_gains_diff ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ); /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ move32(); } #endif /* OPT_SBA_REND_V1_BE */ p_gains_diff++; } Loading @@ -2564,15 +2585,25 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( g1 = alpha[l]; // Q31 move32(); g2 = L_sub( ONE_IN_Q31, g1 ); // Q31 #ifdef OPT_SBA_REND_V1_BE W_temp = W_mac_32_32( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ), g2, ( *( p_cy_auto_dir_smooth_prev ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/ #else /* OPT_SBA_REND_V1_BE */ W_temp = W_add( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ), W_mult_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/ #endif /* OPT_SBA_REND_V1_BE */ q_tmp = W_norm( W_temp ); L_tmp = W_extract_h( W_shl( W_temp, q_tmp ) ); // q_cy_auto_dir_smooth_prev_local + q_tmp *( p_cy_auto_dir_smooth_prev++ ) = L_shr_r( L_tmp, q_tmp ); // q_cy_auto_dir_smooth_prev_local move32(); #ifdef OPT_SBA_REND_V1_BE *( p_cy_cross_dir_smooth_prev ) = Madd_32_32( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ), g2, ( *( p_cy_cross_dir_smooth_prev ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev #else /* OPT_SBA_REND_V1_BE */ *( p_cy_cross_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ), Mpy_32_32( g2, ( *( p_cy_cross_dir_smooth_prev ) ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev #endif /* OPT_SBA_REND_V1_BE */ move32(); test(); if ( *( p_cy_cross_dir_smooth_prev ) == 0 && ( *( p_cy_cross_dir_smooth ) != 0 ) ) Loading Loading @@ -2604,11 +2635,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_dir ) = 0; move32(); } #ifdef OPT_SBA_REND_V1_BE ELSE IF( GT_32( *( p_gains_dir ), cmp ) ) /*gains_dir_prev_q*/ { *( p_gains_dir ) = cmp; /*gains_dir_prev_q*/ move32(); } #else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) ) /*gains_dir_prev_q*/ { *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*gains_dir_prev_q*/ move32(); } #endif /* OPT_SBA_REND_V1_BE */ IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 ) { Loading Loading @@ -2695,7 +2734,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( shl( i_mult( proto_direct_index[k], num_freq_bands ), Q1 ); FOR( l = 0; l < num_freq_bands; l++ ) { #ifdef OPT_SBA_REND_V1_BE g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q #else /* OPT_SBA_REND_V1_BE */ g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q #endif /* OPT_SBA_REND_V1_BE */ Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ); // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer move64(); Loading @@ -2717,7 +2760,12 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( { FOR( l = 0; l < h_dirac_output_synthesis_params->max_band_decorr; l++ ) { #ifdef OPT_SBA_REND_V1_BE g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q #else /* OPT_SBA_REND_V1_BE */ g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q #endif /* OPT_SBA_REND_V1_BE */ Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_RealBuffer64_fx[k][buf_idx][l], W_shr( W_mult0_32_32( g, ( *( p_power_smooth_diff++ ) ) ), negate( q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) -> gains_diff_prev_q + q_proto_direct_buffer move64(); Loading Loading @@ -2766,16 +2814,27 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( } } q_align = W_norm( W_temp ); #ifdef OPT_SBA_REND_V1_BE Word16 shift = sub( q_align, 32 ); #endif /* OPT_SBA_REND_V1_BE */ FOR( buf_idx = 0; buf_idx < nbslots; ++buf_idx ) { FOR( k = 0; k < nchan_out_woLFE; k++ ) { FOR( l = 0; l < num_freq_bands; l++ ) { #ifdef OPT_SBA_REND_V1_BE RealBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_RealBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); ImagBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_ImagBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); #else /* OPT_SBA_REND_V1_BE */ RealBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_RealBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); ImagBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_ImagBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); #endif /* OPT_SBA_REND_V1_BE */ } } } Loading lib_rend/ivas_dirac_rend_fx.c +6 −2 Original line number Diff line number Diff line Loading @@ -3155,7 +3155,11 @@ void protoSignalComputation4_fx( sq_tmp_fx = Madd_32_32( Mpy_32_32( proto_frame_f_fx[idx], proto_frame_f_fx[idx] ), proto_frame_f_fx[idx + 1], proto_frame_f_fx[idx + 1] ); // 2*(proto_frame_f_q)-31 sq_tmp_q = sub( add( *proto_frame_f_q, *proto_frame_f_q ), 31 ); #ifdef OPT_SBA_REND_V1_BE proto_power_smooth_fx_q = s_min( *proto_power_smooth_q, sq_tmp_q ); proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( L_shr( proto_power_smooth_fx[l + ( k * num_freq_bands )], sub( *proto_power_smooth_q, proto_power_smooth_fx_q ) ), L_shr( sq_tmp_fx, sub( sq_tmp_q, proto_power_smooth_fx_q ) ) ); // proto_power_smooth_fx_q #else /* OPT_SBA_REND_V1_BE */ IF( LT_16( *proto_power_smooth_q, sq_tmp_q ) ) { proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( proto_power_smooth_fx[l + ( k * num_freq_bands )], L_shr( sq_tmp_fx, sub( sq_tmp_q, *proto_power_smooth_q ) ) ); // proto_power_smooth_q Loading @@ -3170,7 +3174,7 @@ void protoSignalComputation4_fx( proto_power_smooth_fx_q = sq_tmp_q; move16(); } #endif /* OPT_SBA_REND_V1_BE */ p_proto_buffer_fx[idx] = proto_frame_f_fx[idx]; // proto_frame_f_q move32(); p_proto_buffer_fx[idx + 1] = proto_frame_f_fx[idx + 1]; // proto_frame_f_q Loading lib_rend/ivas_efap_fx.c +10 −6 Original line number Diff line number Diff line Loading @@ -2513,7 +2513,11 @@ static Word16 in_tri_fx( #endif /* VEC_ARITH_OPT_v1 */ /* Verification of the non-colinearity : Q22 * Q22 = Q13 */ #ifdef OPT_SBA_REND_V1_BE invFactor = Msub_32_32( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), tmpDot1[1], tmpDot2[0] ); /*q22+q22-q31->q13*/ #else /* OPT_SBA_REND_V1_BE */ invFactor = L_sub( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), Mpy_32_32( tmpDot1[1], tmpDot2[0] ) ); /*q22+q22-q31->q13*/ #endif /* OPT_SBA_REND_V1_BE */ IF( invFactor == 0 ) { Loading Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -75,6 +75,7 @@ #define FIX_1379_MASA_ANGLE_ROUND /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ #define OPT_SBA_REND_V1_BE #define OPT_SBA_ENC_V1_BE #define OPT_BIN_RENDERER_V1 #define OPT_BIN_RENDERER_V2 Loading
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +165 −0 Original line number Diff line number Diff line Loading @@ -3415,6 +3415,169 @@ static void eig2x2_fx( move16(); move16(); #ifdef OPT_SBA_REND_V1_BE /* Eigenvectors */ FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { Word16 q_diff = sub( q_e, *q_D ); IF( q_diff > 0 ) { tmp1 = L_sub( D_fx[ch], L_shr( e1, q_diff ) ); tmp2 = L_sub( D_fx[ch], L_shr( e2, q_diff ) ); q_tmp1 = *q_D; move16(); } ELSE { tmp1 = L_sub( L_shl( D_fx[ch], q_diff ), e1 ); tmp2 = L_sub( L_shl( D_fx[ch], q_diff ), e2 ); q_tmp1 = q_e; move16(); } IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) ) { s_fx = tmp2; move32(); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { c_re = L_shr( c_re, q_diff ); c_im = L_shr( c_im, q_diff ); q_c = q_tmp1; move16(); } ELSE { s_fx = L_shl( s_fx, q_diff ); q_tmp1 = q_c; move16(); } Ure_fx[0][ch] = Mpy_32_32( s_fx, normVal_fx ); move32(); Ure_fx[1][ch] = Mpy_32_32( c_re, normVal_fx ); move32(); Uim_fx[1][ch] = Mpy_32_32( c_im, normVal_fx ); move32(); q_U_1 = sub( add( q_tmp1, q_tmp2 ), 31 ); IF( q_U_2 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); IF( q_diff > 0 ) { Ure_fx[1][ch - 1] = L_shr( Ure_fx[1][ch - 1], q_diff ); Ure_fx[0][ch - 1] = L_shr( Ure_fx[0][ch - 1], q_diff ); Uim_fx[0][ch - 1] = L_shr( Uim_fx[0][ch - 1], q_diff ); q_U_2 = q_U_1; move32(); move32(); move32(); move16(); } ELSE IF( GT_16( q_U_1, q_U_2 ) ) { Ure_fx[1][ch] = L_shl( Ure_fx[1][ch], q_diff ); Ure_fx[0][ch] = L_shl( Ure_fx[0][ch], q_diff ); Uim_fx[1][ch] = L_shl( Uim_fx[1][ch], q_diff ); q_U_1 = q_U_2; move32(); move32(); move32(); move16(); } } q_U_2 = q_U_1; move16(); } ELSE { s_fx = tmp1; move32(); exp = sub( norm_l( s_fx ), 1 ); tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); tmp2 = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2 q_tmp2 = sub( 31, exp ); q_diff = sub( q_c, q_tmp1 ); IF( q_diff > 0 ) { c_re = L_shr( c_re, q_diff ); c_im = L_shr( c_im, q_diff ); q_c = q_tmp1; move16(); } ELSE { s_fx = L_shl( s_fx, q_diff ); q_tmp1 = q_c; move16(); } Ure_fx[1][ch] = Mpy_32_32( s_fx, normVal_fx ); move32(); Ure_fx[0][ch] = Mpy_32_32( c_re, normVal_fx ); move32(); Uim_fx[0][ch] = Mpy_32_32( L_negate( c_im ), normVal_fx ); move32(); q_U_2 = sub( add( q_tmp1, q_tmp2 ), 31 ); IF( q_U_1 != 0 ) { q_diff = sub( q_U_2, q_U_1 ); IF( q_diff > 0 ) { Ure_fx[1][ch] = L_shr( Ure_fx[1][ch], q_diff ); Ure_fx[0][ch] = L_shr( Ure_fx[0][ch], q_diff ); Uim_fx[0][ch] = L_shr( Uim_fx[0][ch], q_diff ); q_U_2 = q_U_1; move32(); move32(); move32(); move16(); } ELSE IF( GT_16( q_U_1, q_U_2 ) ) { Ure_fx[1][ch - 1] = L_shl( Ure_fx[1][ch - 1], q_diff ); Ure_fx[0][ch - 1] = L_shl( Ure_fx[0][ch - 1], q_diff ); Uim_fx[1][ch - 1] = L_shl( Uim_fx[1][ch - 1], q_diff ); q_U_1 = q_U_2; move32(); move32(); move32(); move16(); } } q_U_1 = q_U_2; move16(); } } #else /* OPT_SBA_REND_V1_BE */ /* Eigenvectors */ FOR( ch = 0; ch < BINAURAL_CHANNELS; ch++ ) { Loading Loading @@ -3571,6 +3734,8 @@ static void eig2x2_fx( move16(); } } #endif /* OPT_SBA_REND_V1_BE */ if ( q_U_1 != 0 ) { *q_U = q_U_1; Loading
lib_rend/ivas_dirac_output_synthesis_dec_fx.c +61 −2 Original line number Diff line number Diff line Loading @@ -2465,6 +2465,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( move16(); move16(); #ifdef OPT_SBA_REND_V1_BE Word32 cmp = W_shl_sat_l( DIRAC_GAIN_LIMIT_Q26, sub( h_dirac_output_synthesis_state->gains_dir_prev_q, 26 ) ); Word32 cmp2 = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ); #endif /* OPT_SBA_REND_V1_BE */ FOR( k = 0; k < nchan_out_woLFE; k++ ) { Word32 power_smooth_temp; Loading Loading @@ -2512,11 +2517,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_dir ) = 0; move32(); } #ifdef OPT_SBA_REND_V1_BE ELSE IF( GT_32( *( p_gains_dir ), cmp ) ) { *( p_gains_dir ) = cmp; /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/ move32(); } #else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) ) { *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*26 + h_dirac_output_synthesis_state->gains_dir_prev_q + 1 + 5 - 32 -> h_dirac_output_synthesis_state->gains_dir_prev_q*/ move32(); } #endif /* OPT_SBA_REND_V1_BE */ IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 ) { Loading Loading @@ -2549,11 +2562,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_diff ) = 0; move32(); } #ifdef OPT_SBA_REND_V1_BE ELSE IF( GT_32( *( p_gains_diff ), cmp2 ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ { *( p_gains_diff ) = cmp2; /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ move32(); } #else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_diff ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ) ) ) /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ { *( p_gains_diff ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_diff_prev_q ) ), Q5 ) ); /*h_dirac_output_synthesis_state->gains_diff_prev_q*/ move32(); } #endif /* OPT_SBA_REND_V1_BE */ p_gains_diff++; } Loading @@ -2564,15 +2585,25 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( g1 = alpha[l]; // Q31 move32(); g2 = L_sub( ONE_IN_Q31, g1 ); // Q31 #ifdef OPT_SBA_REND_V1_BE W_temp = W_mac_32_32( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ), g2, ( *( p_cy_auto_dir_smooth_prev ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/ #else /* OPT_SBA_REND_V1_BE */ W_temp = W_add( W_mult_32_32( g1, ( *( p_cy_auto_dir_smooth++ ) ) ), W_mult_32_32( g2, ( *( p_cy_auto_dir_smooth_prev ) ) ) ); /*32+q_cy_auto_dir_smooth_prev_local*/ #endif /* OPT_SBA_REND_V1_BE */ q_tmp = W_norm( W_temp ); L_tmp = W_extract_h( W_shl( W_temp, q_tmp ) ); // q_cy_auto_dir_smooth_prev_local + q_tmp *( p_cy_auto_dir_smooth_prev++ ) = L_shr_r( L_tmp, q_tmp ); // q_cy_auto_dir_smooth_prev_local move32(); #ifdef OPT_SBA_REND_V1_BE *( p_cy_cross_dir_smooth_prev ) = Madd_32_32( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ), g2, ( *( p_cy_cross_dir_smooth_prev ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev #else /* OPT_SBA_REND_V1_BE */ *( p_cy_cross_dir_smooth_prev ) = L_add( Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth ) ) ), Mpy_32_32( g2, ( *( p_cy_cross_dir_smooth_prev ) ) ) ); // (Q31, q_cy_cross_dir_smooth_prev) -> q_cy_cross_dir_smooth_prev #endif /* OPT_SBA_REND_V1_BE */ move32(); test(); if ( *( p_cy_cross_dir_smooth_prev ) == 0 && ( *( p_cy_cross_dir_smooth ) != 0 ) ) Loading Loading @@ -2604,11 +2635,19 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( *( p_gains_dir ) = 0; move32(); } #ifdef OPT_SBA_REND_V1_BE ELSE IF( GT_32( *( p_gains_dir ), cmp ) ) /*gains_dir_prev_q*/ { *( p_gains_dir ) = cmp; /*gains_dir_prev_q*/ move32(); } #else /* OPT_SBA_REND_V1_BE */ ELSE IF( GT_32( *( p_gains_dir ), W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ) ) ) /*gains_dir_prev_q*/ { *( p_gains_dir ) = W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state->gains_dir_prev_q ) ), Q5 ) ); /*gains_dir_prev_q*/ move32(); } #endif /* OPT_SBA_REND_V1_BE */ IF( *( p_cy_cross_dir_smooth_prev++ ) < 0 ) { Loading Loading @@ -2695,7 +2734,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( shl( i_mult( proto_direct_index[k], num_freq_bands ), Q1 ); FOR( l = 0; l < num_freq_bands; l++ ) { #ifdef OPT_SBA_REND_V1_BE g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q #else /* OPT_SBA_REND_V1_BE */ g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_dir_prev_q) -> gains_dir_prev_q #endif /* OPT_SBA_REND_V1_BE */ Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_mult0_32_32( g, ( *( p_power_smooth++ ) ) ); // (gains_dir_prev_q, q_proto_direct_buffer) -> gains_dir_prev_q + q_proto_direct_buffer move64(); Loading @@ -2717,7 +2760,12 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( { FOR( l = 0; l < h_dirac_output_synthesis_params->max_band_decorr; l++ ) { #ifdef OPT_SBA_REND_V1_BE g = Madd_32_32( Mpy_32_32( g1, *( p_gain_1++ ) ), g2, *( p_gain_2++ ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q #else /* OPT_SBA_REND_V1_BE */ g = L_add( Mpy_32_32( g1, *( p_gain_1++ ) ), Mpy_32_32( g2, *( p_gain_2++ ) ) ); // (Q31, gains_diff_prev_q) -> gains_diff_prev_q #endif /* OPT_SBA_REND_V1_BE */ Cldfb_RealBuffer64_fx[k][buf_idx][l] = W_add( Cldfb_RealBuffer64_fx[k][buf_idx][l], W_shr( W_mult0_32_32( g, ( *( p_power_smooth_diff++ ) ) ), negate( q_align ) ) ); // (gains_diff_prev_q, q_proto_direct_buffer) -> gains_diff_prev_q + q_proto_direct_buffer move64(); Loading Loading @@ -2766,16 +2814,27 @@ void ivas_dirac_dec_output_synthesis_process_subframe_psd_ls_fx( } } q_align = W_norm( W_temp ); #ifdef OPT_SBA_REND_V1_BE Word16 shift = sub( q_align, 32 ); #endif /* OPT_SBA_REND_V1_BE */ FOR( buf_idx = 0; buf_idx < nbslots; ++buf_idx ) { FOR( k = 0; k < nchan_out_woLFE; k++ ) { FOR( l = 0; l < num_freq_bands; l++ ) { #ifdef OPT_SBA_REND_V1_BE RealBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_RealBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); ImagBuffer[k][buf_idx][l] = W_shl_sat_l( Cldfb_ImagBuffer64_fx[k][buf_idx][l], shift ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); #else /* OPT_SBA_REND_V1_BE */ RealBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_RealBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); ImagBuffer[k][buf_idx][l] = W_extract_h( W_shl( Cldfb_ImagBuffer64_fx[k][buf_idx][l], q_align ) ); /*( ( ( h_dirac_output_synthesis_state->proto_direct_buffer_f_q+h_dirac_output_synthesis_state->gains_dir_prev_q )+ q_align )- 32 )*/ move32(); #endif /* OPT_SBA_REND_V1_BE */ } } } Loading
lib_rend/ivas_dirac_rend_fx.c +6 −2 Original line number Diff line number Diff line Loading @@ -3155,7 +3155,11 @@ void protoSignalComputation4_fx( sq_tmp_fx = Madd_32_32( Mpy_32_32( proto_frame_f_fx[idx], proto_frame_f_fx[idx] ), proto_frame_f_fx[idx + 1], proto_frame_f_fx[idx + 1] ); // 2*(proto_frame_f_q)-31 sq_tmp_q = sub( add( *proto_frame_f_q, *proto_frame_f_q ), 31 ); #ifdef OPT_SBA_REND_V1_BE proto_power_smooth_fx_q = s_min( *proto_power_smooth_q, sq_tmp_q ); proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( L_shr( proto_power_smooth_fx[l + ( k * num_freq_bands )], sub( *proto_power_smooth_q, proto_power_smooth_fx_q ) ), L_shr( sq_tmp_fx, sub( sq_tmp_q, proto_power_smooth_fx_q ) ) ); // proto_power_smooth_fx_q #else /* OPT_SBA_REND_V1_BE */ IF( LT_16( *proto_power_smooth_q, sq_tmp_q ) ) { proto_power_smooth_fx[l + ( k * num_freq_bands )] = L_add( proto_power_smooth_fx[l + ( k * num_freq_bands )], L_shr( sq_tmp_fx, sub( sq_tmp_q, *proto_power_smooth_q ) ) ); // proto_power_smooth_q Loading @@ -3170,7 +3174,7 @@ void protoSignalComputation4_fx( proto_power_smooth_fx_q = sq_tmp_q; move16(); } #endif /* OPT_SBA_REND_V1_BE */ p_proto_buffer_fx[idx] = proto_frame_f_fx[idx]; // proto_frame_f_q move32(); p_proto_buffer_fx[idx + 1] = proto_frame_f_fx[idx + 1]; // proto_frame_f_q Loading
lib_rend/ivas_efap_fx.c +10 −6 Original line number Diff line number Diff line Loading @@ -2513,7 +2513,11 @@ static Word16 in_tri_fx( #endif /* VEC_ARITH_OPT_v1 */ /* Verification of the non-colinearity : Q22 * Q22 = Q13 */ #ifdef OPT_SBA_REND_V1_BE invFactor = Msub_32_32( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), tmpDot1[1], tmpDot2[0] ); /*q22+q22-q31->q13*/ #else /* OPT_SBA_REND_V1_BE */ invFactor = L_sub( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), Mpy_32_32( tmpDot1[1], tmpDot2[0] ) ); /*q22+q22-q31->q13*/ #endif /* OPT_SBA_REND_V1_BE */ IF( invFactor == 0 ) { Loading