Optimization changes for Encoder Decoder (56a9c797) · Commits · SA4 / Audio / IVAS BASOP

lib_com/cnst.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -1536,6 +1536,7 @@ enum
		#define SHB_OVERLAP_LEN ( L_FRAME16k - L_SHB_LAHEAD ) / ( NUM_SHB_SUBFR - 1 )
		#define QUANT_DIST_INIT ( 10000000000.0f ) /* Quantiser search distance initialisation */
		#define HIBND_ACB_L_FAC 5 / 2 /* SHB Interpolation Factor */
		#define HIBND_ACB_L_FAC_Q1 ( 5 ) /* SHB Interpolation Factor Q1 */
		#define NUM_HILBERTS 2
		#define HILBERT_ORDER1 5
		#define HILBERT_ORDER2 4

lib_com/fft_fx.c

+21 −21

Original line number	Diff line number	Diff line
		@@ -4978,15 +4978,15 @@ static void fft_len20_fx(
		cmplx tt[4];
		cmplx y[20];

		xx[0] = CL_shr( x[0], SCALEFACTOR20 ); // Qx
		xx[0] = x[0]; // CL_shr( x[0], SCALEFACTOR20 ); // Qx
		move64();
		xx[1] = CL_shr( x[16], SCALEFACTOR20 ); // Qx
		xx[1] = x[16]; // CL_shr( x[16], SCALEFACTOR20 ); // Qx
		move64();
		xx[2] = CL_shr( x[12], SCALEFACTOR20 ); // Qx
		xx[2] = x[12]; // CL_shr( x[12], SCALEFACTOR20 ); // Qx
		move64();
		xx[3] = CL_shr( x[8], SCALEFACTOR20 ); // Qx
		xx[3] = x[8]; // CL_shr( x[8], SCALEFACTOR20 ); // Qx
		move64();
		xx[4] = CL_shr( x[4], SCALEFACTOR20 ); // Qx
		xx[4] = x[4]; // CL_shr( x[4], SCALEFACTOR20 ); // Qx
		move64();

		s[0] = CL_add( xx[1], xx[4] );
		@@ -5023,15 +5023,15 @@ static void fft_len20_fx(
		y[12] = CL_msu_j( s[2], s[3] );
		move64();

		xx[0] = CL_shr( x[5], SCALEFACTOR20 );
		xx[0] = x[5]; // CL_shr( x[5], SCALEFACTOR20 );
		move64();
		xx[1] = CL_shr( x[1], SCALEFACTOR20 );
		xx[1] = x[1]; // CL_shr( x[1], SCALEFACTOR20 );
		move64();
		xx[2] = CL_shr( x[17], SCALEFACTOR20 );
		xx[2] = x[17]; // CL_shr( x[17], SCALEFACTOR20 );
		move64();
		xx[3] = CL_shr( x[13], SCALEFACTOR20 );
		xx[3] = x[13]; // CL_shr( x[13], SCALEFACTOR20 );
		move64();
		xx[4] = CL_shr( x[9], SCALEFACTOR20 );
		xx[4] = x[9]; // CL_shr( x[9], SCALEFACTOR20 );
		move64();

		s[0] = CL_add( xx[1], xx[4] );
		@@ -5068,15 +5068,15 @@ static void fft_len20_fx(
		y[13] = CL_msu_j( s[2], s[3] );
		move64();

		xx[0] = CL_shr( x[10], SCALEFACTOR20 );
		xx[0] = x[10]; // CL_shr( x[10], SCALEFACTOR20 );
		move64();
		xx[1] = CL_shr( x[6], SCALEFACTOR20 );
		xx[1] = x[6]; // CL_shr( x[6], SCALEFACTOR20 );
		move64();
		xx[2] = CL_shr( x[2], SCALEFACTOR20 );
		xx[2] = x[2]; // CL_shr( x[2], SCALEFACTOR20 );
		move64();
		xx[3] = CL_shr( x[18], SCALEFACTOR20 );
		xx[3] = x[18]; // CL_shr( x[18], SCALEFACTOR20 );
		move64();
		xx[4] = CL_shr( x[14], SCALEFACTOR20 );
		xx[4] = x[14]; // CL_shr( x[14], SCALEFACTOR20 );
		move64();

		s[0] = CL_add( xx[1], xx[4] );
		@@ -5113,15 +5113,15 @@ static void fft_len20_fx(
		y[14] = CL_msu_j( s[2], s[3] );
		move64();

		xx[0] = CL_shr( x[15], SCALEFACTOR20 );
		xx[0] = x[15]; // CL_shr( x[15], SCALEFACTOR20 );
		move64();
		xx[1] = CL_shr( x[11], SCALEFACTOR20 );
		xx[1] = x[11]; // CL_shr( x[11], SCALEFACTOR20 );
		move64();
		xx[2] = CL_shr( x[7], SCALEFACTOR20 );
		xx[2] = x[7]; // CL_shr( x[7], SCALEFACTOR20 );
		move64();
		xx[3] = CL_shr( x[3], SCALEFACTOR20 );
		xx[3] = x[3]; // CL_shr( x[3], SCALEFACTOR20 );
		move64();
		xx[4] = CL_shr( x[19], SCALEFACTOR20 );
		xx[4] = x[19]; // CL_shr( x[19], SCALEFACTOR20 );
		move64();

		s[0] = CL_add( xx[1], xx[4] );
		@@ -7173,7 +7173,7 @@ void rfft_fx(
		move32();
		x[( length - ( i << 1 ) )] = Mpy_32_16_1( L_add( t1, t3 ), 16384 /0.5.Q15/ );
		move32();
		x[( ( length - ( i << 1 ) ) + 1 )] = Mpy_32_16_1( L_negate( L_add( t2, t4 ) ), 16384 /0.5.Q15/ );
		x[( ( length - ( i << 1 ) ) + 1 )] = Mpy_32_16_1( ( L_add( t2, t4 ) ), -16384 /0.5.Q15/ );
		move32();
		}

lib_com/hp50_fx.c

+29 −86

Original line number	Diff line number	Diff line
		@@ -469,8 +469,9 @@ void hp20_fx_32(
		{
		Word16 i;
		Word32 a1_fx, a2_fx, b1_fx, b2_fx;
		Word16 Qx0, Qx1, Qx2, Qy1, Qprev_y1, Qy2, Qprev_y2, Qmin;
		Word64 x0_fx64, x1_fx64, x2_fx64, y0_fx64, y1_fx64, y2_fx64, R1, R2, R3, R4, R5;
		Word16 Qy1, Qy2, Qmin;
		Word64 y0_fx64, y1_fx64, y2_fx64;
		Word32 x0, x1, x2;

		IF( EQ_32( Fs, 8000 ) )
		{
		@@ -521,20 +522,22 @@ void hp20_fx_32(
		move32();
		move32();

		Qprev_y1 = extract_l( mem_fx[4] );
		Qprev_y2 = extract_l( mem_fx[5] );
		y1_fx64 = W_deposit32_l( mem_fx[0] );
		y2_fx64 = W_deposit32_l( mem_fx[1] );
		x0_fx64 = W_deposit32_l( mem_fx[2] );
		x1_fx64 = W_deposit32_l( mem_fx[3] );
		y1_fx64 = W_add( W_deposit32_l( mem_fx[0] ), W_deposit32_h( mem_fx[1] ) );
		y2_fx64 = W_add( W_deposit32_l( mem_fx[2] ), W_deposit32_h( mem_fx[3] ) );

		x0 = mem_fx[4];
		move32();
		x1 = mem_fx[5];
		move32();

		FOR( i = 0; i < lg; i++ )
		{
		x2_fx64 = x1_fx64;
		move64();
		x1_fx64 = x0_fx64;
		move64();
		x0_fx64 = W_deposit32_l( signal_fx[i] );
		x2 = x1;
		move32();
		x1 = x0;
		move32();
		x0 = signal_fx[i];
		move32();

		Qy1 = W_norm( y1_fx64 );
		if ( y1_fx64 == 0 )
		@@ -542,9 +545,6 @@ void hp20_fx_32(
		Qy1 = 62;
		move16();
		}
		Qy1 = sub( Qy1, 34 );
		R1 = W_mult0_32_32( W_extract_l( W_shl( y1_fx64, Qy1 ) ), a1_fx );
		Qy1 = add( Qy1, Qprev_y1 );

		Qy2 = W_norm( y2_fx64 );
		if ( y2_fx64 == 0 )
		@@ -552,89 +552,32 @@ void hp20_fx_32(
		Qy2 = 62;
		move16();
		}
		Qy2 = sub( Qy2, 34 );
		R2 = W_mult0_32_32( W_extract_l( W_shl( y2_fx64, Qy2 ) ), a2_fx );
		Qy2 = add( Qy2, Qprev_y2 );

		Qx0 = W_norm( x0_fx64 );
		if ( x0_fx64 == 0 )
		{
		Qx0 = 62;
		move16();
		}
		Qx0 = sub( Qx0, 34 );
		R3 = W_mult0_32_32( W_extract_l( W_shl( x0_fx64, Qx0 ) ), b2_fx );

		Qx1 = W_norm( x1_fx64 );
		if ( x1_fx64 == 0 )
		{
		Qx1 = 62;
		move16();
		}
		Qx1 = sub( Qx1, 34 );
		R4 = W_mult0_32_32( W_extract_l( W_shl( x1_fx64, Qx1 ) ), b1_fx );

		Qx2 = W_norm( x2_fx64 );
		if ( x2_fx64 == 0 )
		{
		Qx2 = 62;
		move16();
		}
		Qx2 = sub( Qx2, 34 );
		R5 = W_mult0_32_32( W_extract_l( W_shl( x2_fx64, Qx2 ) ), b2_fx );

		Qmin = s_min( Qy1, Qy2 );

		y0_fx64 = W_add( W_shr( R1, sub( Qy1, Qmin ) ), W_shr( R2, sub( Qy2, Qmin ) ) );

		Qmin = s_min( Qmin, Qx0 );
		Qmin = s_min( Qmin, Qx1 );
		Qmin = s_min( Qmin, Qx2 );
		Qmin = sub( Qmin, 34 );

		y0_fx64 = W_add( W_shr( y0_fx64, sub( s_min( Qy1, Qy2 ), Qmin ) ), W_add( W_shr( R3, sub( Qx0, Qmin ) ), W_add( W_shr( R4, sub( Qx1, Qmin ) ), W_shr( R5, sub( Qx2, Qmin ) ) ) ) );
		y0_fx64 = W_mac_32_32( W_mult_32_32( W_shl_sat_l( y1_fx64, Qmin ), a1_fx ), W_shl_sat_l( y2_fx64, Qmin ), a2_fx ); // Qmin + Q29 + Q30 + 1

		y0_fx64 = W_shr( y0_fx64, 29 );

		signal_fx[i] = W_extract_l( W_shr( y0_fx64, Qmin ) );
		move32();
		IF( signal_fx[i] < 0 )
		{
		signal_fx[i] = L_add( signal_fx[i], 1 );
		Word64 temp = W_mac_32_32( W_mac_32_32( W_mult_32_32( x2, b2_fx ), x1, b1_fx ), x0, b2_fx ); // Q30
		Word64 y0_fx = W_shr( y0_fx64, add( Qmin, Q30 ) ); // Q30
		y0_fx64 = W_add( temp, y0_fx ); // Q30
		signal_fx[i] = W_extract_l( W_shr( y0_fx64, Q30 ) );
		move32();
		}

		y2_fx64 = y1_fx64;
		y1_fx64 = y0_fx64;
		Qprev_y2 = Qprev_y1;
		Qprev_y1 = Qmin;
		move64();
		y1_fx64 = y0_fx64;
		move64();
		move16();
		move16();
		}

		Qy1 = W_norm( y1_fx64 );
		test();
		IF( y1_fx64 != 0 && LT_16( Qy1, 32 ) )
		{
		y1_fx64 = W_shr( y1_fx64, sub( 32, Qy1 ) );
		Qprev_y1 = sub( Qprev_y1, sub( 32, Qy1 ) );
		}

		Qy2 = W_norm( y2_fx64 );
		test();
		IF( y2_fx64 != 0 && LT_16( Qy2, 32 ) )
		{
		y2_fx64 = W_shr( y2_fx64, sub( 32, Qy2 ) );
		Qprev_y2 = sub( Qprev_y2, sub( 32, Qy2 ) );
		}

		mem_fx[0] = W_extract_l( y1_fx64 );
		mem_fx[1] = W_extract_l( y2_fx64 );
		mem_fx[2] = W_extract_l( x0_fx64 );
		mem_fx[3] = W_extract_l( x1_fx64 );
		mem_fx[4] = Qprev_y1;
		mem_fx[5] = Qprev_y2;
		mem_fx[1] = W_extract_h( y1_fx64 );
		mem_fx[2] = W_extract_l( y2_fx64 );
		mem_fx[3] = W_extract_h( y2_fx64 );
		mem_fx[4] = x0;
		mem_fx[5] = x1;

		move32();
		move32();
		move32();

lib_com/options.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -156,6 +156,7 @@
		#define FIX_ISSUE_1214 /* Ittiam: Fix for issue 1214: Energy leakage in IGF tiles for MDCT-stereo @64kbps SWB*/
		#define FIX_881_HILBERT_FILTER /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */
		#define FIX_ISSUE_1245 /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/
		#define FIX_ISSUE_1291 /* Ittiam: Wrong use of imult1616() in ACELP rescaling */
		#define FIX_920_IGF_INIT_ERROR /* FhG: issue 920: fix bitrate mismatch in initial IGF config to avoid error message in same cases */
		#define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
		#define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */

lib_dec/acelp_core_dec_ivas_fx.c

+5 −1

Original line number	Diff line number	Diff line
		@@ -748,7 +748,11 @@ ivas_error acelp_core_dec_ivas_fx(
		IF( st->hMusicPF && st->hGSCDec )
		{
		Rescale_exc( st->hMusicPF->dct_post_old_exc_fx, exc_fx, bwe_exc_fx, st->hGSCDec->last_exc_dct_in_fx, st->L_frame,
		#ifdef FIX_ISSUE_1291
		shr( imult1616( st->L_frame, HIBND_ACB_L_FAC_Q1 ), 1 ), 0, &( st->Q_exc ), st->Q_subfr, NULL, 0, INACTIVE );
		#else
		imult1616( st->L_frame, HIBND_ACB_L_FAC ), 0, &( st->Q_exc ), st->Q_subfr, NULL, 0, INACTIVE );
		#endif
		}
		IF( st->hPFstat != NULL )
		{
		@@ -799,7 +803,7 @@ ivas_error acelp_core_dec_ivas_fx(
		Copy( syn1_fx + st->L_frame - L_SYN_MEM_CLAS_ESTIM, st->mem_syn_clas_estim_fx, L_SYN_MEM_CLAS_ESTIM );

		/* save and delay synthesis to be used by SWB BWE */
		Copy_Scale_sig( syn1_fx, temp_buf_fx, st->L_frame, sub( -1, st->Q_syn ) ); // Q_syn
		Copy_Scale_sig( syn1_fx, temp_buf_fx, st->L_frame, sub( -1, st->Q_syn ) ); // Q_syn -> Q(-1)
		IF( st->hBWE_FD != NULL )
		{
		#ifdef FIX_ISSUE_1290