Optimize ivas_filter_process_fx part 3. (43a92380) · Commits · SA4 / Audio / IVAS BASOP

lib_com/ivas_filters_fx.c

+71 −117

Original line number	Diff line number	Diff line
		@@ -43,7 +43,7 @@
		------------------------------------------------------------------------------------------/

		static void ivas_iir_2_filter_fx( ivas_filters_process_state_t filter_state, Word32 pIn_Out_fx, const Word16 length, const Word16 stage, Word16 *pIn_Out_e );
		static void ivas_iir_2_filter_fixed_fx( ivas_filters_process_state_t filter_state, Word32 pIn_Out_fx, const Word16 length, const Word16 stage, Word16 *pIn_Out_e );
		static void ivas_iir_2_filter_fixed_fx( ivas_filters_process_state_t filter_state, Word32 pIn_Out_fx, const Word16 length, const Word16 stage, Word16 q );


		/-----------------------------------------------------------------------------------------
		@@ -79,6 +79,12 @@ void ivas_filters_init_fx(
		move32();
		move16();
		move16();
		#ifdef OPT_2239_IVAS_FILTER_PROCESS
		filter_state->num_q[IVAS_FILTER_STAGE_0][i] = Q30;
		filter_state->den_q[IVAS_FILTER_STAGE_0][i] = Q30;
		move16();
		move16();
		#endif
		}

		filter_state->state_fx[0][0] = 0;
		@@ -94,15 +100,27 @@ void ivas_filters_init_fx(
		move16();
		move16();

		filter_state->state64_fx[0][0] = 0;
		filter_state->state64_fx[0][1] = 0;
		filter_state->state64_fx[0][2] = 0;
		#ifdef OPT_2239_IVAS_FILTER_PROCESS
		filter_state->state64_fx[IVAS_FILTER_STAGE_0][0] = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_0][1] = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_0][2] = 0;
		move64();
		move64();
		move64();
		filter_state->state64_q[IVAS_FILTER_STAGE_0] = Q30;
		move16();
		#endif
		}
		ELSE
		{
		filter_state->filt_len = IVAS_BIQUAD_FILT_LEN;
		move16();

		#ifdef OPT_2239_IVAS_FILTER_PROCESS
		filter_state->state64_q[IVAS_FILTER_STAGE_0] = 63;
		filter_state->state64_q[IVAS_FILTER_STAGE_1] = 63;
		#endif

		FOR( i = 0; i < IVAS_BIQUAD_FILT_LEN; i++ )
		{

		@@ -122,6 +140,24 @@ void ivas_filters_init_fx(
		move16();
		move16();
		move16();
		#ifdef OPT_2239_IVAS_FILTER_PROCESS
		filter_state->num_q[IVAS_FILTER_STAGE_0][i] = sub( 31, filt_coeff_e[i + 0 * IVAS_BIQUAD_FILT_LEN] );
		filter_state->den_q[IVAS_FILTER_STAGE_0][i] = sub( 31, filt_coeff_e[i + 1 * IVAS_BIQUAD_FILT_LEN] );
		filter_state->num_q[IVAS_FILTER_STAGE_1][i] = sub( 31, filt_coeff_e[i + 2 * IVAS_BIQUAD_FILT_LEN] );
		filter_state->den_q[IVAS_FILTER_STAGE_1][i] = sub( 31, filt_coeff_e[i + 3 * IVAS_BIQUAD_FILT_LEN] );
		move16();
		move16();
		move16();
		move16();
		filter_state->state64_q[IVAS_FILTER_STAGE_0] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_0], filter_state->num_q[IVAS_FILTER_STAGE_0][i] );
		filter_state->state64_q[IVAS_FILTER_STAGE_0] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_0], filter_state->den_q[IVAS_FILTER_STAGE_0][i] );
		filter_state->state64_q[IVAS_FILTER_STAGE_1] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_1], filter_state->num_q[IVAS_FILTER_STAGE_1][i] );
		filter_state->state64_q[IVAS_FILTER_STAGE_1] = s_min( filter_state->state64_q[IVAS_FILTER_STAGE_1], filter_state->den_q[IVAS_FILTER_STAGE_1][i] );
		move16();
		move16();
		move16();
		move16();
		#endif
		}

		filter_state->state_fx[0][0] = 0;
		@@ -149,17 +185,21 @@ void ivas_filters_init_fx(
		move16();
		move16();

		filter_state->state64_fx[0][0] = 0;
		filter_state->state64_fx[0][1] = 0;
		filter_state->state64_fx[0][2] = 0;
		filter_state->state64_fx[1][0] = 0;
		filter_state->state64_fx[1][1] = 0;
		filter_state->state64_fx[1][2] = 0;
		}

		#ifdef OPT_2239_IVAS_FILTER_PROCESS
		filter_state->q_diff = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_0][0] = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_0][1] = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_0][2] = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_1][0] = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_1][1] = 0;
		filter_state->state64_fx[IVAS_FILTER_STAGE_1][2] = 0;
		move64();
		move64();
		move64();
		move64();
		move64();
		move64();
		#endif
		}

		return;
		}
		@@ -306,16 +346,16 @@ void ivas_filter_process_fixed_fx(
		ivas_filters_process_state_t filter_state, / i/o: filter state handle */
		Word32 pIn_Out_fx, / i/o: signal subject to filtering Q(q_factor) */
		const Word16 length, /* i : filter order */
		Word16 *q_factor )
		Word16 q )
		{
		SWITCH( filter_state->order )
		{
		case IVAS_FILTER_ORDER_1:
		ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_factor );
		ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q );
		BREAK;
		case IVAS_FILTER_ORDER_4:
		ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q_factor );
		ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_1, q_factor );
		ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_0, q );
		ivas_iir_2_filter_fixed_fx( filter_state, pIn_Out_fx, length, IVAS_FILTER_STAGE_1, q );
		BREAK;
		default:
		BREAK;
		@@ -324,129 +364,43 @@ void ivas_filter_process_fixed_fx(
		return;
		}

		#if 0
		static void __check( Word64 computed_fx, Word16 computed_q, Word64 expected_fx, Word16 expected_q );

		static void __check( Word64 computed_fx, Word16 computed_q, Word64 expected_fx, Word16 expected_q )
		{
		Word16 cq, eq, shift, q_diff;
		Word64 cfx, efx;

		shift = W_norm( computed_fx );
		cfx = W_shl( computed_fx, shift );
		cq = add( computed_q, shift );

		shift = W_norm( expected_fx );
		efx = W_shl( expected_fx, shift );
		eq = add( expected_q, shift );

		q_diff = cq - eq;
		shift = abs( q_diff );
		if ( q_diff >= 0 )
		{
		cfx = shift < 64 ? W_shr( cfx, shift ) : 0;
		}
		else
		{
		efx = shift < 64 ? W_shr( efx, shift ) : 0;
		}
		Word64 error = llabs( cfx - efx );
		if ( error > ( ( (Word64) 1 ) << 60 ) )
		{
		assert( false );
		}
		}

		Word64 __to_state64( Word16 q, Word32 state_fx, Word16 state_e )
		{
		Word16 q_diff = q - ( 31 - state_e );
		Word64 state;
		if ( q_diff <= -32 )
		state = 0;
		else
		state = W_shl( (Word64) state_fx, q_diff );
		// __check( state, q, state_fx, 31 - state_e );
		return state;
		}
		#endif

		static void ivas_iir_2_filter_fixed_fx(
		ivas_filters_process_state_t *filter_state,
		Word32 pIn_Out_fx, // Q(31-pIn_Out_e)
		const Word16 length,
		const Word16 stage,
		Word16 *q_factor )
		Word16 q )
		{
		Word16 i, j;

		Word32 *pIn_fx = pIn_Out_fx;
		Word32 *pOut_fx = pIn_Out_fx;
		Word32 tmp_pIn_buf_i_fx;

		Word32 L_tmp_prod, L_tmp;
		Word16 L_prod_e, L_tmp_e;

		Word16 q_in, q_out;
		Word16 e_in, e_out;
		q_in = *q_factor;
		*q_factor = q_out = q_in + filter_state->q_diff;

		// Word64 shift, tmp_prod, tmp, out64;
		Word64 shift, tmp_prod, tmp;
		Word32 in, out;

		FOR( i = 0; i < length; i++ )
		{
		tmp_pIn_buf_i_fx = pIn_fx[i];
		in = pIn_Out_fx[i];
		move32();
		e_in = 31 - q_in;
		move16();

		L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][0], pIn_fx[i] ); // Q31 -(pIn_Out_e[i]+ filter_state->num_e[stage][0])
		L_prod_e = add( 31 - q_in, filter_state->num_e[stage][0] );

		// shift = 31 - filter_state->num_e[stage][0] + q_in - ( q_out + 32 );
		// tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][0], pIn_fx[i] ), shift );
		// __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );
		shift = filter_state->num_q[stage][0] + q - filter_state->state64_q[stage];
		tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][0], in ), shift );

		e_out = 31 - q_in;
		pOut_fx[i] = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][0], filter_state->state_e[stage][0], L_tmp_prod, L_prod_e, &e_out ); // Q31 - pIn_Out_e[i]
		shift = sub( filter_state->state64_q[stage], q );
		pIn_Out_fx[i] = out = W_shr( W_add( filter_state->state64_fx[stage][0], tmp_prod ), shift );
		move32();

		// out64 = W_add( filter_state->state64_fx[stage][0], tmp_prod );
		// __check( out64, q_out + 32, pOut_fx[i], 31 - e_out );
		// pOut_fx[i] = (Word32)(out64 >> 32);

		FOR( j = 1; j < filter_state->filt_len; j++ )
		{
		L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ); // Q31-L_prod_e
		L_prod_e = add( filter_state->num_e[stage][j], e_in );

		// shift = 31 - filter_state->num_e[stage][j] + q_in - ( q_out + 32 );
		// tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ), shift );
		// __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );

		L_tmp = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][j], filter_state->state_e[stage][j], L_tmp_prod, L_prod_e, &L_tmp_e ); // Q31 - L_tmp_e

		// tmp = W_add( filter_state->state64_fx[stage][j], tmp_prod );
		// __check( tmp, q_out + 32, L_tmp, 31 - L_tmp_e );
		shift = filter_state->num_q[stage][j] + q - filter_state->state64_q[stage];
		tmp_prod = W_shr( W_mult0_32_32( filter_state->num_fx[stage][j], in ), shift );

		L_tmp_prod = Mpy_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ); // Q31 - ( pIn_Out_e[i]+filter_state->den_e[stage][j] )
		L_prod_e = add( e_out, filter_state->den_e[stage][j] );
		tmp = W_add( filter_state->state64_fx[stage][j], tmp_prod );

		// shift = 31 - filter_state->den_e[stage][j] + q_out - ( q_out + 32 );
		// tmp_prod = W_shr( W_mult0_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ), shift );
		// __check( tmp_prod, q_out + 32, L_tmp_prod, 31 - L_prod_e );
		shift = filter_state->den_q[stage][j] + q - filter_state->state64_q[stage];
		tmp_prod = W_shr( W_mult0_32_32( filter_state->den_fx[stage][j], out ), shift );

		filter_state->state_fx[stage][j - 1] = BASOP_Util_Add_Mant32Exp( L_tmp, L_tmp_e, L_negate( L_tmp_prod ), L_prod_e, &filter_state->state_e[stage][j - 1] ); // Q31 - filter_state->state_e[stage][j - 1]
		filter_state->state64_fx[stage][j - 1] = W_add( tmp, W_neg( tmp_prod ) );
		move32();

		// filter_state->state64_fx[stage][j - 1] = W_add( tmp, tmp_prod );
		// __check( filter_state->state64_fx[stage][j - 1], q_out + 32, filter_state->state_fx[stage][j - 1], 31 - filter_state->state_e[stage][j - 1] );

		// filter_state->state64_fx[stage][j - 1] = __to_state64( q_out + 32, filter_state->state_fx[stage][j - 1], filter_state->state_e[stage][j - 1] );
		}

		pOut_fx[i] = L_shr_r_sat( pOut_fx[i], sub( sub( 31, q_out ), e_out ) ); // Q(31-pIn_Out_e[i]) -> Q(q_factor)
		move32();
		}

		return;

lib_com/ivas_prot_fx.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -3821,7 +3821,7 @@ void ivas_filter_process_fixed_fx(
		ivas_filters_process_state_t filter_state, / i/o: filter state handle */
		Word32 pIn_Out_fx, / i : signal subject to filtering */
		const Word16 length, /* i : filter order */
		Word16 *q_factor
		Word16 q
		);

		ivas_error ivas_osba_enc_open_fx(

lib_com/ivas_stat_com.h

+4 −2

Original line number	Diff line number	Diff line
		@@ -662,8 +662,10 @@ typedef struct ivas_filters_process_state_t
		Word16 state_e[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];

		#ifdef OPT_2239_IVAS_FILTER_PROCESS
		Word16 q_diff; // q_diff = q_out - q_in -> q_out = q_diff + q_in
		Word32 state64_fx[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
		Word64 state64_fx[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
		Word16 num_q[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
		Word16 den_q[IVAS_FILTER_MAX_STAGES][IVAS_BIQUAD_FILT_LEN];
		Word16 state64_q[IVAS_FILTER_MAX_STAGES];
		#endif

		} ivas_filters_process_state_t;

lib_com/ivas_transient_det_fx.c

+75 −19

Original line number	Diff line number	Diff line
		@@ -375,7 +375,6 @@ static Word32 ivas_calc_duck_gain_fx(
		* Calculate in/out ducking gains
		-----------------------------------------------------------------------------------------/

		#ifndef OPT_2239_IVAS_FILTER_PROCESS
		static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Word16 expected_q );

		static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Word16 expected_q )
		@@ -393,12 +392,11 @@ static void check( Word32 computed_fx, Word16 computed_q, Word32 expected_fx, Wo
		efx = L_shr( expected_fx, -q_diff );
		}
		Word32 error = abs( cfx - efx );
		if ( error >= ( 1 << 22 ) )
		if ( error > ( 1 << 16 ) )
		{
		assert( false );
		}
		}
		#endif

		void ivas_td_decorr_get_ducking_gains_fx(
		ivas_trans_det_state_t hTranDet, / i/o: Transient detector handle */
		@@ -424,11 +422,11 @@ void ivas_td_decorr_get_ducking_gains_fx(
		Copy32( pIn_pcm, e_fast_fx, frame_len ); /Q11/


		#ifdef OPT_2239_IVAS_FILTER_PROCESS
		#if 0 // def OPT_2239_IVAS_FILTER_PROCESS
		/* env hpf */
		Word16 q_fast = Q11;

		ivas_filter_process_fixed_fx( &hTranDet->env_hpf, e_fast_fx, frame_len, &q_fast );
		ivas_filter_process_fixed_fx( &hTranDet->env_hpf, e_fast_fx, frame_len, q_fast );

		Word32 env_eps_fx = IVAS_TDET_PARM_ENV_EPS_fx;
		Word16 env_eps_q = Q31;
		@@ -436,9 +434,9 @@ void ivas_td_decorr_get_ducking_gains_fx(
		Word16 q_diff = sub( q_fast, env_eps_q );
		q_fast = s_min( q_fast, env_eps_q );

		if ( q_diff >= 0 )
		IF( q_diff >= 0 )
		{
		for ( i = 0; i < frame_len; i++ )
		FOR( i = 0; i < frame_len; i++ )
		{
		e_fast_fx[i] = L_add( L_abs( L_shr( e_fast_fx[i], q_diff ) ), env_eps_fx );
		move32();
		@@ -446,10 +444,10 @@ void ivas_td_decorr_get_ducking_gains_fx(
		move32();
		}
		}
		else
		ELSE
		{
		env_eps_fx = L_shl( env_eps_fx, q_diff );
		for ( i = 0; i < frame_len; i++ )
		FOR( i = 0; i < frame_len; i++ )
		{
		e_fast_fx[i] = L_add( L_abs( e_fast_fx[i] ), env_eps_fx );
		move32();
		@@ -461,10 +459,10 @@ void ivas_td_decorr_get_ducking_gains_fx(
		Word16 q_slow = q_fast;

		/* env fast*/
		ivas_filter_process_fixed_fx( &hTranDet->env_fast, e_fast_fx, frame_len, &q_fast );
		ivas_filter_process_fixed_fx( &hTranDet->env_fast, e_fast_fx, frame_len, q_fast );

		/* env slow */
		ivas_filter_process_fixed_fx( &hTranDet->env_slow, e_slow_fx, frame_len, &q_slow );
		ivas_filter_process_fixed_fx( &hTranDet->env_slow, e_slow_fx, frame_len, q_slow );


		IF( tdet_flag )
		@@ -503,15 +501,17 @@ void ivas_td_decorr_get_ducking_gains_fx(
		ivas_filter_process_exp_fx( &hTranDet->env_hpf, e_fast_fx, frame_len, e_fast_e );


		// Word32 test_e_fast_fx[L_FRAME48k];
		// Word16 q_fast = Q11;
		// Copy32( pIn_pcm, test_e_fast_fx, frame_len ); /Q11/
		// ivas_filter_process_fixed_fx( &hTranDet->test_env_hpf, test_e_fast_fx, frame_len, &q_fast );
		// ====================================================================================================
		Word32 test_e_fast_fx[L_FRAME48k];
		Word16 q_fast = Q11;
		Copy32( pIn_pcm, test_e_fast_fx, frame_len ); /Q11/
		ivas_filter_process_fixed_fx( &hTranDet->test_env_hpf, test_e_fast_fx, frame_len, q_fast );

		// for ( i = 0; i < frame_len; i++ )
		// {
		// check(test_e_fast_fx[i],q_fast,e_fast_fx[i],31-e_fast_e[i]);
		// }
		for ( i = 0; i < frame_len; i++ )
		{
		check( test_e_fast_fx[i], q_fast, e_fast_fx[i], 31 - e_fast_e[i] );
		}
		// ====================================================================================================


		FOR( i = 0; i < frame_len; i++ )
		@@ -525,12 +525,68 @@ void ivas_td_decorr_get_ducking_gains_fx(
		move16();
		}

		// ====================================================================================================
		Word32 test_e_slow_fx[L_FRAME48k];
		Word32 env_eps_fx = IVAS_TDET_PARM_ENV_EPS_fx;
		Word16 env_eps_q = Q31;

		Word16 q_diff = sub( q_fast, env_eps_q );
		q_fast = s_min( q_fast, env_eps_q );

		if ( q_diff >= 0 )
		{
		for ( i = 0; i < frame_len; i++ )
		{
		test_e_fast_fx[i] = L_add( L_abs( L_shr( test_e_fast_fx[i], q_diff ) ), env_eps_fx );
		move32();
		test_e_slow_fx[i] = test_e_fast_fx[i];
		move32();
		}
		}
		else
		{
		env_eps_fx = L_shl( env_eps_fx, q_diff );
		for ( i = 0; i < frame_len; i++ )
		{
		test_e_fast_fx[i] = L_add( L_abs( test_e_fast_fx[i] ), env_eps_fx );
		move32();
		test_e_slow_fx[i] = test_e_fast_fx[i];
		move32();
		}
		}

		Word16 q_slow = q_fast;

		for ( i = 0; i < frame_len; i++ )
		{
		check( test_e_fast_fx[i], q_fast, e_fast_fx[i], 31 - e_fast_e[i] );
		}
		// ====================================================================================================

		/* env fast*/
		ivas_filter_process_exp_fx( &hTranDet->env_fast, e_fast_fx, frame_len, e_fast_e );

		// ====================================================================================================
		ivas_filter_process_fixed_fx( &hTranDet->test_env_fast, test_e_fast_fx, frame_len, q_fast );

		for ( i = 0; i < frame_len; i++ )
		{
		check( test_e_fast_fx[i], q_fast, e_fast_fx[i], 31 - e_fast_e[i] );
		}
		// ====================================================================================================

		/* env slow */
		ivas_filter_process_exp_fx( &hTranDet->env_slow, e_slow_fx, frame_len, e_slow_e );

		// ====================================================================================================
		ivas_filter_process_fixed_fx( &hTranDet->test_env_slow, test_e_slow_fx, frame_len, q_slow );

		for ( i = 0; i < frame_len; i++ )
		{
		check( test_e_slow_fx[i], q_slow, e_slow_fx[i], 31 - e_slow_e[i] );
		}
		// ====================================================================================================

		IF( tdet_flag )
		{
		FOR( i = 0; i < frame_len; i++ )