experiment to pass h1 without scaling (2c5740bf) · Commits · SA4 / Audio / IVAS BASOP

lib_dec/dec_tcx.c

+23 −12

Original line number	Diff line number	Diff line
		@@ -237,6 +237,7 @@ void decoder_tcx_imdct_fx(
		Word16 q_a_itf = 15;
		Word16 x_e = sub( 31, q_x );
		move16();
		Word16 shift_q = sub( q_x, q_win );

		/-----------------------------------------------------------------
		* Initializations
		@@ -364,9 +365,10 @@ void decoder_tcx_imdct_fx(

		IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) )
		{
		Word16 copy_len = s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) );
		set32_fx( x_tmp_fx, 0, L_FRAME_PLUS );
		Copy32( x_fx, x_tmp_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x
		Copy32( x_fx, xn_bufFB_fx, s_min( L_FRAME48k, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ) ); // q_x
		Copy32( x_fx, x_tmp_fx, copy_len ); // q_x
		Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x
		}
		ELSE IF( ( st->element_mode == EVS_MONO ) )
		{
		@@ -374,8 +376,9 @@ void decoder_tcx_imdct_fx(
		}
		ELSE
		{
		Copy32( x_fx, x_tmp_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x
		Copy32( x_fx, xn_bufFB_fx, s_max( L_spec, s_max( L_frame, L_frameTCX ) ) ); // q_x
		Word16 copy_len = s_max( L_spec, s_max( L_frame, L_frameTCX ) );
		Copy32( x_fx, x_tmp_fx, copy_len ); // q_x
		Copy32( x_fx, xn_bufFB_fx, copy_len ); // q_x
		}

		IF( ( st->igf != 0 ) )
		@@ -416,24 +419,29 @@ void decoder_tcx_imdct_fx(

		FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ )
		{
		xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], sub( q_x, q_win ) ) ); // q_x
		xn_bufFB_fx_16[ind] = extract_l( L_shr( xn_bufFB_fx[ind], shift_q ) ); // q_x
		move16();
		}

		Word16 ratio_e;
		Word16 ratio = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &ratio_e ); // Q = 15-ratio_e. * FSCALE_DENOM is (1 << 9)
		ratio = shr( ratio, sub( 6, ratio_e ) );

		IF( st->element_mode != EVS_MONO )
		{
		IMDCT_ivas_fx( x_tmp_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB,
		hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index,
		kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, max( L_frameTCX, L_spec ) >> 1, L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win );
		kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win );
		}
		ELSE
		{

		IMDCT_ivas_fx( x_fx, q_x, hTcxDec->syn_OverlFB, hTcxDec->syn_Overl_TDACFB, xn_bufFB_fx_16, hTcxCfg->tcx_aldo_window_1_FB, hTcxCfg->tcx_aldo_window_1_FB_trunc, hTcxCfg->tcx_aldo_window_2_FB, hTcxCfg->tcx_mdct_window_halfFB, hTcxCfg->tcx_mdct_window_minimumFB, hTcxCfg->tcx_mdct_window_transFB, hTcxCfg->tcx_mdct_window_half_lengthFB, hTcxCfg->tcx_mdct_window_min_lengthFB, index,
		kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, FSCALE_DENOM * L_frameTCX_glob / L_frame_glob, acelp_zir_fx, q_win );
		kernelType, left_rect, tcx_offsetFB, overlapFB, L_frameTCX, L_frameTCX, shr( s_max( L_frameTCX, L_spec ), 1 ), L_frameTCX_glob, frame_cnt, bfi, st->hHQ_core->old_out_fx, 1, st, ratio, acelp_zir_fx, q_win );
		}
		FOR( Word16 ind = 0; ind < L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX; ind++ )
		{
		xn_bufFB_fx[ind] = L_shl( xn_bufFB_fx_16[ind], sub( q_x, q_win ) ); // Q_x
		xn_bufFB_fx[ind] = L_shl( L_deposit_l( xn_bufFB_fx_16[ind] ), shift_q ); // Q_x
		}

		IF( ( bfi == 0 ) )
		@@ -453,19 +461,22 @@ void decoder_tcx_imdct_fx(

		IF( EQ_16( st->element_mode, IVAS_CPE_MDCT ) )
		{
		res_m = BASOP_Util_Divide1616_Scale( L_frame_glob, L_FRAME, &res_e );
		st->old_fpitch = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
		// Using sat as a single instruction shifts and extracts
		st->old_fpitch = W_shl_sat_l( W_mult0_32_32( st->old_fpitch, L_frame_glob ), -8 ); // Divide by 256 ==> SHR by 8
		move32();
		}

		IF( GT_16( st->element_mode, EVS_MONO ) )
		{
		res_m = BASOP_Util_Divide1616_Scale( L_frameTCX_glob, L_frame_glob, &res_e );
		st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
		move32();
		}
		ELSE
		{
		res_m = BASOP_Util_Divide1616_Scale( L_frameTCX, L_frame, &res_e );
		st->old_fpitchFB = L_shl( Mpy_32_16_1( st->old_fpitch, res_m ), res_e );
		move32();
		}
		}

		@@ -475,7 +486,7 @@ void decoder_tcx_imdct_fx(
		Copy( xn_buf_fx + L_frame, hTcxDec->syn_Overl, overlap ); // Q(-2)
		FOR( Word16 ind = 0; ind < overlapFB; ind++ )
		{
		hTcxDec->syn_OverlFB[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + L_frameTCX )], sub( q_x, q_win ) ); // q_x
		hTcxDec->syn_OverlFB[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + L_frameTCX )], shift_q ) ); // q_x
		}
		}

		@@ -483,7 +494,7 @@ void decoder_tcx_imdct_fx(
		Copy( xn_buf_fx + sub( shr( overlap, 1 ), tcx_offset ), synth_fx, L_frame_glob ); // Q(-2)
		FOR( Word16 ind = 0; ind < L_frameTCX_glob; ind++ )
		{
		synthFB_fx[ind] = (Word16) L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], sub( q_x, q_win ) ); // q_x
		synthFB_fx[ind] = extract_l( L_shr( xn_bufFB_fx[( ind + ( ( overlapFB >> 1 ) - tcx_offsetFB ) )], shift_q ) ); // q_x
		}

lib_dec/ivas_binRenderer_internal.c

+40 −41

Original line number	Diff line number	Diff line
		@@ -70,6 +70,7 @@ static void ivas_binRenderer_filterModule_fx(
		Word32 filterStatesLeftRealPtr_fx, filterStatesLeftImagPtr_fx;
		Word16 *Q_filterStates;
		const Word32 filterTapsLeftRealPtr_fx, filterTapsLeftImagPtr_fx, filterTapsRightRealPtr_fx, filterTapsRightImagPtr_fx;
		Word16 shift_q;

		FOR( bandIdx = 0; bandIdx < hBinRenderer->conv_band; bandIdx++ )
		{
		@@ -87,11 +88,6 @@ static void ivas_binRenderer_filterModule_fx(
		FOR( k = 0; k < numTimeSlots; k++ )
		{
		Word64 outRealLeft_fx = 0, outRealRight_fx = 0, outImagLeft_fx = 0, outImagRight_fx = 0;
		Word64 W_sub1 = 0, W_add1 = 0, W_sub2 = 0, W_add2 = 0;
		move64();
		move64();
		move64();
		move64();
		move64();
		move64();
		move64();
		@@ -104,31 +100,32 @@ static void ivas_binRenderer_filterModule_fx(
		filterStatesLeftImagPtr_fx[tapIdx] = filterStatesLeftImagPtr_fx[tapIdx - 1];
		move32();

		W_sub1 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
		W_add1 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
		W_sub2 = W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
		W_add2 = W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] ) ); // Q29 + Q_filterStates[tapIdx - 1]
		shift_q = sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] );
		outRealLeft_fx = W_shr( outRealLeft_fx, shift_q );
		outImagLeft_fx = W_shr( outImagLeft_fx, shift_q );
		outRealRight_fx = W_shr( outRealRight_fx, shift_q );
		outImagRight_fx = W_shr( outImagRight_fx, shift_q );

		outRealLeft_fx = W_shr( outRealLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
		outImagLeft_fx = W_shr( outImagLeft_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
		outRealRight_fx = W_shr( outRealRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
		outImagRight_fx = W_shr( outImagRight_fx, sub( Q_filterStates[tapIdx], Q_filterStates[tapIdx - 1] ) );
		outRealLeft_fx = W_mac_32_32( outRealLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] );
		outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsLeftImagPtr_fx[tapIdx] ); // Q30 + Q_filterStates[tapIdx - 1]

		Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1];
		move16();
		outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsLeftImagPtr_fx[tapIdx] );
		outImagLeft_fx = W_mac_32_32( outImagLeft_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsLeftRealPtr_fx[tapIdx] );

		/* Left Real and Imag */
		outRealLeft_fx = W_add( outRealLeft_fx, W_sub1 ); // Q29 + Q_filterStates[1]
		outImagLeft_fx = W_add( outImagLeft_fx, W_add1 ); // Q29 + Q_filterStates[1]
		outRealRight_fx = W_mac_32_32( outRealRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] );
		outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( filterStatesLeftImagPtr_fx[tapIdx] ), filterTapsRightImagPtr_fx[tapIdx] );

		/* Right Real and Imag*/
		outRealRight_fx = W_add( outRealRight_fx, W_sub2 ); // Q29 + Q_filterStates[1]
		outImagRight_fx = W_add( outImagRight_fx, W_add2 ); // Q29 + Q_filterStates[1]
		outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftRealPtr_fx[tapIdx], filterTapsRightImagPtr_fx[tapIdx] );
		outImagRight_fx = W_mac_32_32( outImagRight_fx, filterStatesLeftImagPtr_fx[tapIdx], filterTapsRightRealPtr_fx[tapIdx] );

		Q_filterStates[tapIdx] = Q_filterStates[tapIdx - 1];
		move16();
		}
		shift_q = add( sub( Q_filterStates[1], Q_curr ), 1 );
		outRealLeft_fx = W_shr( outRealLeft_fx, shift_q );
		outImagLeft_fx = W_shr( outImagLeft_fx, shift_q );
		outRealRight_fx = W_shr( outRealRight_fx, shift_q );
		outImagRight_fx = W_shr( outImagRight_fx, shift_q );

		filterStatesLeftRealPtr_fx[0] = CLDFB_real[chIdx][k][bandIdx];
		move32();
		@@ -141,27 +138,29 @@ static void ivas_binRenderer_filterModule_fx(
		/* Left Real and Imag */
		// Q29 + Q_curr

		out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx],
		W_add( W_shr( outRealLeft_fx, sub( Q_filterStates[1], Q_curr ) ),
		W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftRealPtr_fx[0] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftImagPtr_fx[0] ) ) ) ); // Q29
		Word32 temp1 = L_shr( filterStatesLeftRealPtr_fx[0], 1 );
		Word32 temp2 = L_shr( filterStatesLeftImagPtr_fx[0], 1 );


		outRealLeft_fx = W_mac_32_32( outRealLeft_fx, temp1, filterTapsLeftRealPtr_fx[0] );
		outRealLeft_fx = W_mac_32_32( outRealLeft_fx, L_negate( temp2 ), filterTapsLeftImagPtr_fx[0] );
		out_Conv_CLDFB_real[0][k][bandIdx] = W_add( out_Conv_CLDFB_real[0][k][bandIdx], outRealLeft_fx ); // Q29
		move64();
		out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx],
		W_add( W_shr( outImagLeft_fx, sub( Q_filterStates[1], Q_curr ) ),
		W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsLeftImagPtr_fx[0] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsLeftRealPtr_fx[0] ) ) ) ); // Q29

		outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp1, filterTapsLeftImagPtr_fx[0] );
		outImagLeft_fx = W_mac_32_32( outImagLeft_fx, temp2, filterTapsLeftRealPtr_fx[0] );
		out_Conv_CLDFB_imag[0][k][bandIdx] = W_add( out_Conv_CLDFB_imag[0][k][bandIdx], outImagLeft_fx ); // Q29
		move64();

		/* Right Real and Imag */
		out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx],
		W_add( W_shr( outRealRight_fx, sub( Q_filterStates[1], Q_curr ) ),
		W_sub( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightRealPtr_fx[0] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightImagPtr_fx[0] ) ) ) ); // Q29
		outRealRight_fx = W_mac_32_32( outRealRight_fx, temp1, filterTapsRightRealPtr_fx[0] );
		outRealRight_fx = W_mac_32_32( outRealRight_fx, L_negate( temp2 ), filterTapsRightImagPtr_fx[0] );
		out_Conv_CLDFB_real[1][k][bandIdx] = W_add( out_Conv_CLDFB_real[1][k][bandIdx], outRealRight_fx ); // Q29
		move64();
		out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx],
		W_add( W_shr( outImagRight_fx, sub( Q_filterStates[1], Q_curr ) ),
		W_add( W_mult0_32_32( filterStatesLeftRealPtr_fx[0], filterTapsRightImagPtr_fx[0] ),
		W_mult0_32_32( filterStatesLeftImagPtr_fx[0], filterTapsRightRealPtr_fx[0] ) ) ) ); // Q29

		outImagRight_fx = W_mac_32_32( outImagRight_fx, temp1, filterTapsRightImagPtr_fx[0] );
		outImagRight_fx = W_mac_32_32( outImagRight_fx, temp2, filterTapsRightRealPtr_fx[0] );
		out_Conv_CLDFB_imag[1][k][bandIdx] = W_add( out_Conv_CLDFB_imag[1][k][bandIdx], outImagRight_fx ); // Q29
		move64();
		}
		}

lib_dec/ivas_dirac_output_synthesis_cov.c

+15 −42

Original line number	Diff line number	Diff line
		@@ -410,8 +410,6 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
		const Word16 nchan_in /* i : number of input channels */
		)
		{
		Word16 cx_init_e;
		Word16 cx_init_imag_e;
		Word16 band_idx, ch_idx;
		Word16 brange[2];
		Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
		@@ -421,10 +419,9 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
		Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
		Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
		Word16 output_e;
		Word16 i, j, tmp1, tmp2, tmp1_e, tmp2_e, shift_imag, shift_real;
		Word32 L_tmp;
		Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
		Word16 band, num_bands;

		Word16 cx_fx_norm, cx_imag_fx_norm;
		/* estimate input covariance */
		/* Already stack here instead of in the process_subframe */

		@@ -451,8 +448,11 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
		move16();
		imag_in_e = ImagBuffer_e;
		move16();
		shift_real = sub( L_norm_arr( real_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
		shift_imag = sub( L_norm_arr( imag_in_buffer_fx, imult1616( num_bands, nchan_in ) ), find_guarded_bits_fx( add( num_bands, 1 ) ) );

		Word16 buf_len = imult1616( num_bands, nchan_in );

		shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
		shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );

		real_in_e = sub( real_in_e, shift_real );
		imag_in_e = sub( imag_in_e, shift_imag );
		@@ -460,50 +460,23 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(

		output_e = s_max( real_in_e, imag_in_e );

		FOR( i = 0; i < num_bands * nchan_in; ++i )
		{
		real_in_buffer_fx[i] = L_shr( real_in_buffer_fx[i], sub( output_e, RealBuffer_e ) ); // Q(31-output_e)
		move32();
		imag_in_buffer_fx[i] = L_shr( imag_in_buffer_fx[i], sub( output_e, ImagBuffer_e ) ); // Q(31-output_e)
		move32();
		}
		scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
		scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );

		cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
		v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );

		v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
		cx_init_e = tmp1_e;
		move16();
		cx_init_imag_e = tmp2_e;
		move16();

		// normalizing both the matrices to a common exponent for a better precision
		tmp1 = 0;
		move16();
		tmp2 = 0;
		move16();

		FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ )
		{
		L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e );
		L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e );
		tmp1 = s_max( tmp1, tmp1_e );
		tmp2 = s_max( tmp2, tmp2_e );
		}
		cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
		cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );

		FOR( j = 0; j < PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS; j++ )
		{
		L_tmp = BASOP_Util_Add_Mant32Exp( cx_fx[j], cx_init_e, 0, 0, &tmp1_e );
		cx_fx[j] = L_shr( L_tmp, sub( tmp1, tmp1_e ) ); // Q(31-tmp1)
		move32();
		L_tmp = BASOP_Util_Add_Mant32Exp( cx_imag_fx[j], cx_init_imag_e, 0, 0, &tmp2_e );
		cx_imag_fx[j] = L_shr( L_tmp, sub( tmp2, tmp2_e ) ); // Q(31-tmp2)
		move32();
		}
		scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
		scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );

		*cx_e = tmp1;
		*cx_e = sub( tmp1_e, cx_fx_norm );
		move16();
		*cx_imag_e = tmp2;
		*cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
		move16();

		return;

lib_dec/ivas_svd_dec.c

+67 −7

Original line number	Diff line number	Diff line
		@@ -912,13 +912,55 @@ static void ApplyRotation_fx(
		)
		{
		Word16 ch;
		Word16 temp_exp;

		d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add( c_e, x11_e ), Mpy_32_32( s, x12 ), add( s_e, x12_e ), d_e ); / exp(d_e) */
		move32();
		g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add( c_e, x12_e ), Mpy_32_32( L_negate( s ), x11 ), add( s_e, x11_e ), g_e ); / exp(g_e) */
		move32();

		#ifdef SVD_WMOPS_OPT
		Word16 c_q = sub( 31, c_e );
		Word16 s_q = sub( 31, s_e );
		Word32 op1, op2;
		Word16 op_e;

		// Bring c and s to same Q
		IF( GT_16( c_q, s_q ) )
		{
		op1 = L_shr( c, sub( c_q, s_q ) );
		op2 = s;
		move32();
		op_e = s_q;
		move16();
		}
		ELSE
		{
		op1 = c;
		move32();
		op2 = L_shr( s, sub( s_q, c_q ) );
		op_e = c_q;
		move16();
		}
		op_e = add( op_e, 1 ); // 64 bit mac -> +1

		FOR( ch = 0; ch < nChannels; ch++ )
		{
		x11 = singularVector[ch][currentIndex2];
		move32();
		x12 = singularVector[ch][currentIndex1];
		move32();

		Word64 temp = W_mac_32_32( W_mult_32_32( op1, x11 ), op2, x12 ); // Q(singularVector) + op_e
		temp = W_shr( temp, op_e ); // Q(singularVector)
		singularVector[ch][currentIndex2] = W_sat_l( temp ); // Q(singularVector)
		move32();

		temp = W_mac_32_32( W_mult_32_32( op1, x12 ), L_negate( op2 ), x11 ); // Q(singularVector) + op_e
		temp = W_shr( temp, op_e ); // Q(singularVector)
		singularVector[ch][currentIndex1] = W_sat_l( temp ); // Q(singularVector)
		move32();
		}
		#else
		#ifndef FIX_MINOR_SVD_WMOPS_MR1010X
		FOR( ch = 0; ch < nChannels; ch++ )
		{
		@@ -952,6 +994,7 @@ static void ApplyRotation_fx(
		move32();
		}

		#endif
		#endif

		return;
		@@ -1605,26 +1648,43 @@ static void singularVectorsAccumulationLeft_fx(
		t_ii = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxWithSign_fx( t_ii ), &temp_exp ); /* exp(1 + (temp_exp + tii_e)) */
		t_ii_e = add( 1, sub( temp_exp, t_ii_e ) );
		#endif
		Word16 tempe;
		Word32 temp = BASOP_Util_Divide3232_Scale_cadence( t_ii, maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &tempe );
		tempe = add( tempe, sub( t_ii_e, singularVectors_Left_e[nCh][nCh] ) );
		// fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) );
		FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
		{
		norm_y = 0;
		move32();
		norm_y_e = 0;
		Word64 acc = 0;
		move64();
		Word64 prod[16];
		Word16 prod_e[16];
		Word16 max_e = -31;
		move16();
		FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
		{
		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
		#else
		norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
		prod[k] = W_mult0_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] );
		prod_e[k] = add( singularVectors_Left_e[k][nCh], singularVectors_Left_e[k][iCh] );
		max_e = s_max( max_e, prod_e[k] );
		#endif
		}
		t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); // t_ii_e+norm_y_e-*singularVectors_e,

		FOR( k = nCh + 1; k < nChannelsL; k++ ) /* nChannelsL */
		{
		acc = W_add( acc, W_shr( prod[k], sub( max_e, prod_e[k] ) ) );
		}
		Word16 acc_e = W_norm( acc );
		acc = W_shl( acc, acc_e );

		norm_y = W_extract_h( acc );
		norm_y_e = add( sub( max_e, acc_e ), 1 );
		t_jj = Mpy_32_32( temp, norm_y );
		#ifndef FIX_1010_OPT_SINGLE_RESCALE
		t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) );
		#else
		t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), singularVectors_Left_e[nCh][nCh] ) );
		t_jj_e = add( tempe, norm_y_e );
		#endif
		FOR( k = nCh; k < nChannelsL; k++ ) /* nChannelsL */
		{

lib_rend/ivas_dirac_decorr_dec.c

+46 −47

File changed.

Preview size limit exceeded, changes collapsed.