Loading lib_dec/dec_tcx.c +23 −12 Original line number Diff line number Diff line Loading @@ -237,6 +237,7 @@ void decoder_tcx_imdct_fx( Word16 q_a_itf = 15; Word16 x_e = sub( 31, q_x ); move16(); Word16 shift_q = sub( q_x, q_win ); /*-----------------------------------------------------------------* * Initializations Loading Loading @@ -364,9 +365,10 @@ void decoder_tcx_imdct_fx( IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) ) { Word16 copy_len = s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); set32_fx( x_tmp_fx, 0, L_FRAME_PLUS ); Copy32( x_fx, x_tmp_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x Copy32( x_fx, xn_bufFB_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x Copy32( x_fx, x_tmp_fx, copy_len ); // q_x Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x } ELSE IF( ( st->element_mode == EVS_MONO ) ) { Loading @@ -374,8 +376,9 @@ void decoder_tcx_imdct_fx( } ELSE { Copy32( x_fx, x_tmp_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x Copy32( x_fx, xn_bufFB_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x Word16 copy_len = s_max( L_spec, s_max( L_frame, L_frameTCX ) ); Copy32( x_fx, x_tmp_fx, copy_len ); // q_x Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x } IF( ( st->igf != 0 ) ) Loading Loading @@ -416,24 +419,29 @@ void decoder_tcx_imdct_fx( FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) { xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], sub( q_x, q_win ) ) ); // q_x xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], shift_q ) ); // q_x move16(); } Word16 ratio_e; Word16 ratio = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &ratio_e ); // Q = 15-ratio_e. * FSCALE_DENOM is (1 << 9) ratio = shr( ratio, sub( 6, ratio_e ) ); IF( st->element_mode != EVS_MONO ) { IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, max( L_frameTCX, L_spec ) >> 1, L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win ); kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); } ELSE { IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win ); kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); } FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) { xn_bufFB_fx[ind] = L_shl( xn_bufFB_fx_16[ind], sub( q_x, q_win ) ); // Q_x xn_bufFB_fx[ind] = L_shl( L_deposit_l( xn_bufFB_fx_16[ind] ), shift_q ); // Q_x } IF( ( bfi == 0 ) ) Loading @@ -453,19 +461,22 @@ void decoder_tcx_imdct_fx( IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) ) { res_m = BASOP_Util_Divide1616_Scale( L_frame_glob, L_FRAME, &res_e ); st->old_fpitch = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e ); // Using sat as a single instruction shifts and extracts st->old_fpitch = W_shl_sat_l( W_mult0_32_32( st->old_fpitch, L_frame_glob ), -8 ); // Divide by 256 ==> SHR by 8 move32(); } IF( GT_16( st->element_mode, EVS_MONO ) ) { res_m = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &res_e ); st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e ); move32(); } ELSE { res_m = BASOP_Util_Divide1616_Scale( L_frameTCX, L_frame, &res_e ); st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e ); move32(); } } Loading @@ -475,7 +486,7 @@ void decoder_tcx_imdct_fx( Copy( xn_buf_fx + L_frame, hTcxDec->syn_Overl, overlap ); // Q(-2) FOR( Word16 ind = 0; ind < overlapFB; ind++ ) { hTcxDec->syn_OverlFB[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + L_frameTCX )], sub( q_x, q_win ) ); // q_x hTcxDec->syn_OverlFB[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ); // q_x } } Loading @@ -483,7 +494,7 @@ void decoder_tcx_imdct_fx( Copy( xn_buf_fx + sub( shr( overlap, 1 ), tcx_offset ), synth_fx, L_frame_glob ); // Q(-2) FOR( Word16 ind = 0; ind < L_frameTCX_glob; ind++ ) { synthFB_fx[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], sub( q_x, q_win ) ); // q_x synthFB_fx[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ); // q_x } Loading lib_dec/ivas_binRenderer_internal.c +40 −41 Original line number Diff line number Diff line Loading @@ -70,6 +70,7 @@ static void ivas_binRenderer_filterModule_fx( Word32 *filterStatesLeftRealPtr_fx, *filterStatesLeftImagPtr_fx; Word16 *Q_filterStates; const Word32 *filterTapsLeftRealPtr_fx, *filterTapsLeftImagPtr_fx, *filterTapsRightRealPtr_fx, *filterTapsRightImagPtr_fx; Word16 shift_q; FOR( bandIdx = 0; bandIdx < hBinRenderer->conv_band; bandIdx++ ) { Loading @@ -87,11 +88,6 @@ static void ivas_binRenderer_filterModule_fx( FOR( k = 0; k < numTimeSlots; k++ ) { Word64 outRealLeft_fx = 0, outRealRight_fx = 0, outImagLeft_fx = 0, outImagRight_fx = 0; Word64 W_sub1 = 0, W_add1 = 0, W_sub2 = 0, W_add2 = 0; move64(); move64(); move64(); move64(); move64(); move64(); move64(); Loading @@ -104,31 +100,32 @@ static void ivas_binRenderer_filterModule_fx( filterStatesLeftImagPtr_fx[tapIdx] = filterStatesLeftImagPtr_fx[tapIdx - 1]; move32(); W_sub1 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] W_add1 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] W_sub2 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] W_add2 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] shift_q = sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ); outRealLeft_fx = W_shr( outRealLeft_fx, shift_q ); outImagLeft_fx = W_shr( outImagLeft_fx, shift_q ); outRealRight_fx = W_shr( outRealRight_fx, shift_q ); outImagRight_fx = W_shr( outImagRight_fx, shift_q ); outRealLeft_fx = W_shr( outRealLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outImagLeft_fx = W_shr( outImagLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outRealRight_fx = W_shr( outRealRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outImagRight_fx = W_shr( outImagRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsLeftImagPtr_fx[tapIdx] ); // Q30 + Q_filterStates[tapIdx - 1] Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1]; move16(); outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ); outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ); /* Left Real and Imag */ outRealLeft_fx = W_add( outRealLeft_fx, W_sub1 ); // Q29 + Q_filterStates[1] outImagLeft_fx = W_add( outImagLeft_fx, W_add1 ); // Q29 + Q_filterStates[1] outRealRight_fx = W_mac_32_32( outRealRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ); outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsRightImagPtr_fx[tapIdx] ); /* Right Real and Imag*/ outRealRight_fx = W_add( outRealRight_fx, W_sub2 ); // Q29 + Q_filterStates[1] outImagRight_fx = W_add( outImagRight_fx, W_add2 ); // Q29 + Q_filterStates[1] outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ); outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ); Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1]; move16(); } shift_q = add( sub( Q_filterStates[1], Q_curr ), 1 ); outRealLeft_fx = W_shr( outRealLeft_fx, shift_q ); outImagLeft_fx = W_shr( outImagLeft_fx, shift_q ); outRealRight_fx = W_shr( outRealRight_fx, shift_q ); outImagRight_fx = W_shr( outImagRight_fx, shift_q ); filterStatesLeftRealPtr_fx[0] = CLDFB_real[chIdx][k][bandIdx]; move32(); Loading @@ -141,27 +138,29 @@ static void ivas_binRenderer_filterModule_fx( /* Left Real and Imag */ // Q29 + Q_curr out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx], W_add( W_shr( outRealLeft_fx, sub( Q_filterStates[1], Q_curr ) ), W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftRealPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftImagPtr_fx[0] ) ) ) ); // Q29 Word32 temp1 = L_shr( filterStatesLeftRealPtr_fx[0], 1 ); Word32 temp2 = L_shr( filterStatesLeftImagPtr_fx[0], 1 ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, temp1, filterTapsLeftRealPtr_fx[0] ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( temp2 ), filterTapsLeftImagPtr_fx[0] ); out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx], outRealLeft_fx ); // Q29 move64(); out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx], W_add( W_shr( outImagLeft_fx, sub( Q_filterStates[1], Q_curr ) ), W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftImagPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftRealPtr_fx[0] ) ) ) ); // Q29 outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp1, filterTapsLeftImagPtr_fx[0] ); outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp2, filterTapsLeftRealPtr_fx[0] ); out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx], outImagLeft_fx ); // Q29 move64(); /* Right Real and Imag */ out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx], W_add( W_shr( outRealRight_fx, sub( Q_filterStates[1], Q_curr ) ), W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightRealPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightImagPtr_fx[0] ) ) ) ); // Q29 outRealRight_fx = W_mac_32_32( outRealRight_fx, temp1, filterTapsRightRealPtr_fx[0] ); outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( temp2 ), filterTapsRightImagPtr_fx[0] ); out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx], outRealRight_fx ); // Q29 move64(); out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx], W_add( W_shr( outImagRight_fx, sub( Q_filterStates[1], Q_curr ) ), W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightImagPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightRealPtr_fx[0] ) ) ) ); // Q29 outImagRight_fx = W_mac_32_32( outImagRight_fx, temp1, filterTapsRightImagPtr_fx[0] ); outImagRight_fx = W_mac_32_32( outImagRight_fx, temp2, filterTapsRightRealPtr_fx[0] ); out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx], outImagRight_fx ); // Q29 move64(); } } Loading lib_dec/ivas_dirac_output_synthesis_cov.c +15 −42 Original line number Diff line number Diff line Loading @@ -410,8 +410,6 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( const Word16 nchan_in /* i : number of input channels */ ) { Word16 cx_init_e; Word16 cx_init_imag_e; Word16 band_idx, ch_idx; Word16 brange[2]; Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS]; Loading @@ -421,10 +419,9 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS]; Word16 output_e; Word16 i, j, tmp1, tmp2, tmp1_e, tmp2_e, shift_imag, shift_real; Word32 L_tmp; Word16 tmp1_e, tmp2_e, shift_imag, shift_real; Word16 band, num_bands; Word16 cx_fx_norm, cx_imag_fx_norm; /* estimate input covariance */ /* Already stack here instead of in the process_subframe */ Loading @@ -451,8 +448,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( move16(); imag_in_e = ImagBuffer_e; move16(); shift_real = sub( L_norm_arr( real_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); shift_imag = sub( L_norm_arr( imag_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); Word16 buf_len = imult1616( num_bands, nchan_in ); shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); real_in_e = sub( real_in_e, shift_real ); imag_in_e = sub( imag_in_e, shift_imag ); Loading @@ -460,50 +460,23 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( output_e = s_max( real_in_e, imag_in_e ); FOR( i = 0; i < num_bands * nchan_in; ++i ) { real_in_buffer_fx[i] = L_shr( real_in_buffer_fx[i], sub( output_e, RealBuffer_e ) ); // Q(31-output_e) move32(); imag_in_buffer_fx[i] = L_shr( imag_in_buffer_fx[i], sub( output_e, ImagBuffer_e ) ); // Q(31-output_e) move32(); } scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) ); scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) ); cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e ); v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 ); v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 ); cx_init_e = tmp1_e; move16(); cx_init_imag_e = tmp2_e; move16(); // normalizing both the matrices to a common exponent for a better precision tmp1 = 0; move16(); tmp2 = 0; move16(); FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ ) { L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e ); L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e ); tmp1 = s_max( tmp1, tmp1_e ); tmp2 = s_max( tmp2, tmp2_e ); } cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS ); cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS ); FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ ) { L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e ); cx_fx[j] = L_shr( L_tmp, sub( tmp1, tmp1_e ) ); // Q(31-tmp1) move32(); L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e ); cx_imag_fx[j] = L_shr( L_tmp, sub( tmp2, tmp2_e ) ); // Q(31-tmp2) move32(); } scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm ); scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm ); *cx_e = tmp1; *cx_e = sub( tmp1_e, cx_fx_norm ); move16(); *cx_imag_e = tmp2; *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm ); move16(); return; Loading lib_dec/ivas_svd_dec.c +67 −7 Original line number Diff line number Diff line Loading @@ -912,13 +912,55 @@ static void ApplyRotation_fx( ) { Word16 ch; Word16 temp_exp; *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */ move32(); *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */ move32(); #ifdef SVD_WMOPS_OPT Word16 c_q = sub( 31, c_e ); Word16 s_q = sub( 31, s_e ); Word32 op1, op2; Word16 op_e; // Bring c and s to same Q IF( GT_16( c_q, s_q ) ) { op1 = L_shr( c, sub( c_q, s_q ) ); op2 = s; move32(); op_e = s_q; move16(); } ELSE { op1 = c; move32(); op2 = L_shr( s, sub( s_q, c_q ) ); op_e = c_q; move16(); } op_e = add( op_e, 1 ); // 64 bit mac -> +1 FOR( ch = 0; ch < nChannels; ch++ ) { x11 = singularVector[ch][currentIndex2]; move32(); x12 = singularVector[ch][currentIndex1]; move32(); Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e temp = W_shr( temp, op_e ); // Q(singularVector) singularVector[ch][currentIndex2] = W_sat_l( temp ); // Q(singularVector) move32(); temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e temp = W_shr( temp, op_e ); // Q(singularVector) singularVector[ch][currentIndex1] = W_sat_l( temp ); // Q(singularVector) move32(); } #else #ifndef FIX_MINOR_SVD_WMOPS_MR1010X FOR( ch = 0; ch < nChannels; ch++ ) { Loading Loading @@ -952,6 +994,7 @@ static void ApplyRotation_fx( move32(); } #endif #endif return; Loading Loading @@ -1605,26 +1648,43 @@ static void singularVectorsAccumulationLeft_fx( t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ t_ii_e = add( 1, sub( temp_exp, t_ii_e ) ); #endif Word16 tempe; Word32 temp = BASOP_Util_Divide3232_Scale_cadence( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe ); tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) ); // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) ); FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { norm_y = 0; move32(); norm_y_e = 0; Word64 acc = 0; move64(); Word64 prod[16]; Word16 prod_e[16]; Word16 max_e = -31; move16(); FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */ { #ifndef FIX_1010_OPT_SINGLE_RESCALE norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ #else norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ); prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ); max_e = s_max( max_e, prod_e[k] ); #endif } t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */ { acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) ); } Word16 acc_e = W_norm( acc ); acc = W_shl( acc, acc_e ); norm_y = W_extract_h( acc ); norm_y_e = add( sub( max_e, acc_e ), 1 ); t_jj = Mpy_32_32( temp, norm_y ); #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); t_jj_e = add( tempe, norm_y_e ); #endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { Loading lib_rend/ivas_dirac_decorr_dec.c +46 −47 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
lib_dec/dec_tcx.c +23 −12 Original line number Diff line number Diff line Loading @@ -237,6 +237,7 @@ void decoder_tcx_imdct_fx( Word16 q_a_itf = 15; Word16 x_e = sub( 31, q_x ); move16(); Word16 shift_q = sub( q_x, q_win ); /*-----------------------------------------------------------------* * Initializations Loading Loading @@ -364,9 +365,10 @@ void decoder_tcx_imdct_fx( IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) ) { Word16 copy_len = s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); set32_fx( x_tmp_fx, 0, L_FRAME_PLUS ); Copy32( x_fx, x_tmp_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x Copy32( x_fx, xn_bufFB_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x Copy32( x_fx, x_tmp_fx, copy_len ); // q_x Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x } ELSE IF( ( st->element_mode == EVS_MONO ) ) { Loading @@ -374,8 +376,9 @@ void decoder_tcx_imdct_fx( } ELSE { Copy32( x_fx, x_tmp_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x Copy32( x_fx, xn_bufFB_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x Word16 copy_len = s_max( L_spec, s_max( L_frame, L_frameTCX ) ); Copy32( x_fx, x_tmp_fx, copy_len ); // q_x Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x } IF( ( st->igf != 0 ) ) Loading Loading @@ -416,24 +419,29 @@ void decoder_tcx_imdct_fx( FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) { xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], sub( q_x, q_win ) ) ); // q_x xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], shift_q ) ); // q_x move16(); } Word16 ratio_e; Word16 ratio = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &ratio_e ); // Q = 15-ratio_e. * FSCALE_DENOM is (1 << 9) ratio = shr( ratio, sub( 6, ratio_e ) ); IF( st->element_mode != EVS_MONO ) { IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, max( L_frameTCX, L_spec ) >> 1, L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win ); kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); } ELSE { IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index, kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win ); kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win ); } FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ ) { xn_bufFB_fx[ind] = L_shl( xn_bufFB_fx_16[ind], sub( q_x, q_win ) ); // Q_x xn_bufFB_fx[ind] = L_shl( L_deposit_l( xn_bufFB_fx_16[ind] ), shift_q ); // Q_x } IF( ( bfi == 0 ) ) Loading @@ -453,19 +461,22 @@ void decoder_tcx_imdct_fx( IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) ) { res_m = BASOP_Util_Divide1616_Scale( L_frame_glob, L_FRAME, &res_e ); st->old_fpitch = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e ); // Using sat as a single instruction shifts and extracts st->old_fpitch = W_shl_sat_l( W_mult0_32_32( st->old_fpitch, L_frame_glob ), -8 ); // Divide by 256 ==> SHR by 8 move32(); } IF( GT_16( st->element_mode, EVS_MONO ) ) { res_m = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &res_e ); st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e ); move32(); } ELSE { res_m = BASOP_Util_Divide1616_Scale( L_frameTCX, L_frame, &res_e ); st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e ); move32(); } } Loading @@ -475,7 +486,7 @@ void decoder_tcx_imdct_fx( Copy( xn_buf_fx + L_frame, hTcxDec->syn_Overl, overlap ); // Q(-2) FOR( Word16 ind = 0; ind < overlapFB; ind++ ) { hTcxDec->syn_OverlFB[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + L_frameTCX )], sub( q_x, q_win ) ); // q_x hTcxDec->syn_OverlFB[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ); // q_x } } Loading @@ -483,7 +494,7 @@ void decoder_tcx_imdct_fx( Copy( xn_buf_fx + sub( shr( overlap, 1 ), tcx_offset ), synth_fx, L_frame_glob ); // Q(-2) FOR( Word16 ind = 0; ind < L_frameTCX_glob; ind++ ) { synthFB_fx[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], sub( q_x, q_win ) ); // q_x synthFB_fx[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ); // q_x } Loading
lib_dec/ivas_binRenderer_internal.c +40 −41 Original line number Diff line number Diff line Loading @@ -70,6 +70,7 @@ static void ivas_binRenderer_filterModule_fx( Word32 *filterStatesLeftRealPtr_fx, *filterStatesLeftImagPtr_fx; Word16 *Q_filterStates; const Word32 *filterTapsLeftRealPtr_fx, *filterTapsLeftImagPtr_fx, *filterTapsRightRealPtr_fx, *filterTapsRightImagPtr_fx; Word16 shift_q; FOR( bandIdx = 0; bandIdx < hBinRenderer->conv_band; bandIdx++ ) { Loading @@ -87,11 +88,6 @@ static void ivas_binRenderer_filterModule_fx( FOR( k = 0; k < numTimeSlots; k++ ) { Word64 outRealLeft_fx = 0, outRealRight_fx = 0, outImagLeft_fx = 0, outImagRight_fx = 0; Word64 W_sub1 = 0, W_add1 = 0, W_sub2 = 0, W_add2 = 0; move64(); move64(); move64(); move64(); move64(); move64(); move64(); Loading @@ -104,31 +100,32 @@ static void ivas_binRenderer_filterModule_fx( filterStatesLeftImagPtr_fx[tapIdx] = filterStatesLeftImagPtr_fx[tapIdx - 1]; move32(); W_sub1 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] W_add1 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] W_sub2 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] W_add2 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1] shift_q = sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ); outRealLeft_fx = W_shr( outRealLeft_fx, shift_q ); outImagLeft_fx = W_shr( outImagLeft_fx, shift_q ); outRealRight_fx = W_shr( outRealRight_fx, shift_q ); outImagRight_fx = W_shr( outImagRight_fx, shift_q ); outRealLeft_fx = W_shr( outRealLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outImagLeft_fx = W_shr( outImagLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outRealRight_fx = W_shr( outRealRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outImagRight_fx = W_shr( outImagRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsLeftImagPtr_fx[tapIdx] ); // Q30 + Q_filterStates[tapIdx - 1] Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1]; move16(); outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ); outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ); /* Left Real and Imag */ outRealLeft_fx = W_add( outRealLeft_fx, W_sub1 ); // Q29 + Q_filterStates[1] outImagLeft_fx = W_add( outImagLeft_fx, W_add1 ); // Q29 + Q_filterStates[1] outRealRight_fx = W_mac_32_32( outRealRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ); outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsRightImagPtr_fx[tapIdx] ); /* Right Real and Imag*/ outRealRight_fx = W_add( outRealRight_fx, W_sub2 ); // Q29 + Q_filterStates[1] outImagRight_fx = W_add( outImagRight_fx, W_add2 ); // Q29 + Q_filterStates[1] outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ); outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ); Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1]; move16(); } shift_q = add( sub( Q_filterStates[1], Q_curr ), 1 ); outRealLeft_fx = W_shr( outRealLeft_fx, shift_q ); outImagLeft_fx = W_shr( outImagLeft_fx, shift_q ); outRealRight_fx = W_shr( outRealRight_fx, shift_q ); outImagRight_fx = W_shr( outImagRight_fx, shift_q ); filterStatesLeftRealPtr_fx[0] = CLDFB_real[chIdx][k][bandIdx]; move32(); Loading @@ -141,27 +138,29 @@ static void ivas_binRenderer_filterModule_fx( /* Left Real and Imag */ // Q29 + Q_curr out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx], W_add( W_shr( outRealLeft_fx, sub( Q_filterStates[1], Q_curr ) ), W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftRealPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftImagPtr_fx[0] ) ) ) ); // Q29 Word32 temp1 = L_shr( filterStatesLeftRealPtr_fx[0], 1 ); Word32 temp2 = L_shr( filterStatesLeftImagPtr_fx[0], 1 ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, temp1, filterTapsLeftRealPtr_fx[0] ); outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( temp2 ), filterTapsLeftImagPtr_fx[0] ); out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx], outRealLeft_fx ); // Q29 move64(); out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx], W_add( W_shr( outImagLeft_fx, sub( Q_filterStates[1], Q_curr ) ), W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftImagPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftRealPtr_fx[0] ) ) ) ); // Q29 outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp1, filterTapsLeftImagPtr_fx[0] ); outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp2, filterTapsLeftRealPtr_fx[0] ); out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx], outImagLeft_fx ); // Q29 move64(); /* Right Real and Imag */ out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx], W_add( W_shr( outRealRight_fx, sub( Q_filterStates[1], Q_curr ) ), W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightRealPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightImagPtr_fx[0] ) ) ) ); // Q29 outRealRight_fx = W_mac_32_32( outRealRight_fx, temp1, filterTapsRightRealPtr_fx[0] ); outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( temp2 ), filterTapsRightImagPtr_fx[0] ); out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx], outRealRight_fx ); // Q29 move64(); out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx], W_add( W_shr( outImagRight_fx, sub( Q_filterStates[1], Q_curr ) ), W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightImagPtr_fx[0] ), W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightRealPtr_fx[0] ) ) ) ); // Q29 outImagRight_fx = W_mac_32_32( outImagRight_fx, temp1, filterTapsRightImagPtr_fx[0] ); outImagRight_fx = W_mac_32_32( outImagRight_fx, temp2, filterTapsRightRealPtr_fx[0] ); out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx], outImagRight_fx ); // Q29 move64(); } } Loading
lib_dec/ivas_dirac_output_synthesis_cov.c +15 −42 Original line number Diff line number Diff line Loading @@ -410,8 +410,6 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( const Word16 nchan_in /* i : number of input channels */ ) { Word16 cx_init_e; Word16 cx_init_imag_e; Word16 band_idx, ch_idx; Word16 brange[2]; Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS]; Loading @@ -421,10 +419,9 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS]; Word16 output_e; Word16 i, j, tmp1, tmp2, tmp1_e, tmp2_e, shift_imag, shift_real; Word32 L_tmp; Word16 tmp1_e, tmp2_e, shift_imag, shift_real; Word16 band, num_bands; Word16 cx_fx_norm, cx_imag_fx_norm; /* estimate input covariance */ /* Already stack here instead of in the process_subframe */ Loading @@ -451,8 +448,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( move16(); imag_in_e = ImagBuffer_e; move16(); shift_real = sub( L_norm_arr( real_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); shift_imag = sub( L_norm_arr( imag_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); Word16 buf_len = imult1616( num_bands, nchan_in ); shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) ); real_in_e = sub( real_in_e, shift_real ); imag_in_e = sub( imag_in_e, shift_imag ); Loading @@ -460,50 +460,23 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx( output_e = s_max( real_in_e, imag_in_e ); FOR( i = 0; i < num_bands * nchan_in; ++i ) { real_in_buffer_fx[i] = L_shr( real_in_buffer_fx[i], sub( output_e, RealBuffer_e ) ); // Q(31-output_e) move32(); imag_in_buffer_fx[i] = L_shr( imag_in_buffer_fx[i], sub( output_e, ImagBuffer_e ) ); // Q(31-output_e) move32(); } scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) ); scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) ); cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e ); v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 ); v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 ); cx_init_e = tmp1_e; move16(); cx_init_imag_e = tmp2_e; move16(); // normalizing both the matrices to a common exponent for a better precision tmp1 = 0; move16(); tmp2 = 0; move16(); FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ ) { L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e ); L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e ); tmp1 = s_max( tmp1, tmp1_e ); tmp2 = s_max( tmp2, tmp2_e ); } cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS ); cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS ); FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ ) { L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e ); cx_fx[j] = L_shr( L_tmp, sub( tmp1, tmp1_e ) ); // Q(31-tmp1) move32(); L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e ); cx_imag_fx[j] = L_shr( L_tmp, sub( tmp2, tmp2_e ) ); // Q(31-tmp2) move32(); } scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm ); scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm ); *cx_e = tmp1; *cx_e = sub( tmp1_e, cx_fx_norm ); move16(); *cx_imag_e = tmp2; *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm ); move16(); return; Loading
lib_dec/ivas_svd_dec.c +67 −7 Original line number Diff line number Diff line Loading @@ -912,13 +912,55 @@ static void ApplyRotation_fx( ) { Word16 ch; Word16 temp_exp; *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); /* exp(d_e) */ move32(); *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); /* exp(g_e) */ move32(); #ifdef SVD_WMOPS_OPT Word16 c_q = sub( 31, c_e ); Word16 s_q = sub( 31, s_e ); Word32 op1, op2; Word16 op_e; // Bring c and s to same Q IF( GT_16( c_q, s_q ) ) { op1 = L_shr( c, sub( c_q, s_q ) ); op2 = s; move32(); op_e = s_q; move16(); } ELSE { op1 = c; move32(); op2 = L_shr( s, sub( s_q, c_q ) ); op_e = c_q; move16(); } op_e = add( op_e, 1 ); // 64 bit mac -> +1 FOR( ch = 0; ch < nChannels; ch++ ) { x11 = singularVector[ch][currentIndex2]; move32(); x12 = singularVector[ch][currentIndex1]; move32(); Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e temp = W_shr( temp, op_e ); // Q(singularVector) singularVector[ch][currentIndex2] = W_sat_l( temp ); // Q(singularVector) move32(); temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e temp = W_shr( temp, op_e ); // Q(singularVector) singularVector[ch][currentIndex1] = W_sat_l( temp ); // Q(singularVector) move32(); } #else #ifndef FIX_MINOR_SVD_WMOPS_MR1010X FOR( ch = 0; ch < nChannels; ch++ ) { Loading Loading @@ -952,6 +994,7 @@ static void ApplyRotation_fx( move32(); } #endif #endif return; Loading Loading @@ -1605,26 +1648,43 @@ static void singularVectorsAccumulationLeft_fx( t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */ t_ii_e = add( 1, sub( temp_exp, t_ii_e ) ); #endif Word16 tempe; Word32 temp = BASOP_Util_Divide3232_Scale_cadence( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe ); tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) ); // fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) ); FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { norm_y = 0; move32(); norm_y_e = 0; Word64 acc = 0; move64(); Word64 prod[16]; Word16 prod_e[16]; Word16 max_e = -31; move16(); FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */ { #ifndef FIX_1010_OPT_SINGLE_RESCALE norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ #else norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ); prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ); max_e = s_max( max_e, prod_e[k] ); #endif } t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e, FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */ { acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) ); } Word16 acc_e = W_norm( acc ); acc = W_shl( acc, acc_e ); norm_y = W_extract_h( acc ); norm_y_e = add( sub( max_e, acc_e ), 1 ); t_jj = Mpy_32_32( temp, norm_y ); #ifndef FIX_1010_OPT_SINGLE_RESCALE t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); #else t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) ); t_jj_e = add( tempe, norm_y_e ); #endif FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */ { Loading
lib_rend/ivas_dirac_decorr_dec.c +46 −47 File changed.Preview size limit exceeded, changes collapsed. Show changes