bring DCT_FX to FP (73a85a89) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_com/ivas_rom_com.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -2819,7 +2819,7 @@ const Word32 dct12_fx[12 * 12] = { // Q31
		619978560, -335436960, 0, 335436960, -619978560, 810030848, -876602816, 810030848, -619978560, 335436960, 619978560, -869301376, 846752832,
		-810030848, 759350208, -695569984, 619978560, -533649696, 438301408, -335436960, 226989024, -114460880
		};
		#endif
		#else
		const float dct4[4*4] =
		{
		0.5000f, 0.6533f, 0.5000f, 0.2706f,
		@@ -2864,7 +2864,7 @@ const float dct12[12*12]=
		0.2887f, -0.3772f, 0.2887f, -0.1562f, -0.0000f, 0.1562f, -0.2887f, 0.3772f, -0.4082f, 0.3772f, -0.2887f, 0.1562f,
		0.2887f, -0.4048f, 0.3943f, -0.3772f, 0.3536f, -0.3239f, 0.2887f, -0.2485f, 0.2041f, -0.1562f, 0.1057f, -0.0533f
		};

		#endif
		/----------------------------------------------------------------------------------
		* ISM ROM tables
		----------------------------------------------------------------------------------/

lib_com/ivas_rom_com.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -330,12 +330,12 @@ extern const Word32 dct4_fx[];
		extern const Word32 dct5_fx[];
		extern const Word32 dct8_fx[];
		extern const Word32 dct12_fx[];
		#endif
		#else
		extern const float dct4[];
		extern const float dct5[];
		extern const float dct8[];
		extern const float dct12[];

		#endif
		/----------------------------------------------------------------------------------
		* ISM ROM tables
		----------------------------------------------------------------------------------/

lib_com/ivas_tools.c

+42 −36

Original line number	Diff line number	Diff line
		@@ -1312,7 +1312,7 @@ Word16 matrix_product_fx(
		{
		x_idx = k + i * rowsX; /Q0/
		y_idx = k + j * rowsY; /Q0/
		( Zp_fx ) = Zp_fx + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /Qx + Qy - 31/
		( Zp_fx ) = Zp_fx + ( X_fx[x_idx] * Y_fx[y_idx] ); /Qx + Qy - 31/
		}
		Zp_fx++;
		}
		@@ -1333,7 +1333,7 @@ Word16 matrix_product_fx(
		{
		x_idx = i + k * rowsX; /Q0/
		y_idx = j + k * rowsY; /Q0/
		( Zp_fx ) = ( Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /Qx + Qy - 31/
		( Zp_fx ) = ( Zp_fx ) + ( X_fx[x_idx] * Y_fx[y_idx] ); /Qx + Qy - 31/
		}
		Zp_fx++;
		}
		@@ -1350,12 +1350,13 @@ Word16 matrix_product_fx(
		for ( i = 0; i < colsX; ++i )
		{
		( *Zp_fx ) = 0;

		move32();
		for ( k = 0; k < colsX; ++k )
		{
		x_idx = k + i * rowsX; /Q0/
		y_idx = j + k * rowsY; /Q0/
		( Zp_fx ) = ( Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /Qx + Qy - 31/
		x_idx = add( k, imult1616( i, rowsX ) ); /Q0/
		y_idx = add( j, imult1616( k, rowsY ) ); /Q0/
		( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); /Qx + Qy - 31/
		move32();
		}

		Zp_fx++;
		@@ -1364,7 +1365,7 @@ Word16 matrix_product_fx(
		}
		else /* Regular case */
		{
		if ( colsX != rowsY )
		if ( NE_16( colsX, rowsY ) )
		{
		return EXIT_FAILURE;
		}
		@@ -1377,10 +1378,10 @@ Word16 matrix_product_fx(

		for ( k = 0; k < colsX; ++k )
		{
		x_idx = i + k * rowsX; /Q0/
		y_idx = k + j * rowsY; /Q0/
		( Zp_fx ) = ( Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /Qx + Qy - 31 L_sat_add() /
		/* TODO: overflow of Z_fx to be checked */
		x_idx = add( i, imult1616( k, rowsX ) ); /Q0/
		y_idx = add( k, imult1616( j, rowsY ) ); /Q0/
		( Zp_fx ) = L_add_sat( Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); /Qx + Qy - 31/
		// TODO: overflow of Z_fx to be checked
		move32();
		}
		Zp_fx++;
		@@ -1406,7 +1407,7 @@ Word16 matrix_product_q30_fx(
		Word16 i, j, k;
		Word16 x_idx, y_idx;
		Word32 *Zp_fx = Z_fx;
		int64_t W_tmp;
		long long int W_tmp;

		/* Processing */
		test();
		@@ -1422,15 +1423,16 @@ Word16 matrix_product_q30_fx(
		{
		for ( i = 0; i < colsX; ++i )
		{
		//( *Zp_fx ) = 0;
		W_tmp = 0;
		for ( k = 0; k < rowsX; ++k )
		{
		/( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[k + i rowsX], Y_fx[k + j * rowsY] ) ); */
		//( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) );
		x_idx = k + i * rowsX; /* Q0 */
		y_idx = k + j * rowsY; /* Q0 */
		W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */
		W_tmp += X_fx[x_idx] * Y_fx[y_idx]; /* Q56 */
		}
		W_tmp = W_tmp * 64; /* W_shl( W_tmp, 6 ); / /Q62*/
		W_tmp = W_tmp << 6; /* W_shl( W_tmp, 6 ); / /Q62*/
		( Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; / W_round64_L( W_tmp ); / /Q30*/
		Zp_fx++;
		}
		@@ -1446,15 +1448,17 @@ Word16 matrix_product_q30_fx(
		{
		for ( i = 0; i < rowsX; ++i )
		{
		//( *Zp_fx ) = 0;
		W_tmp = 0;

		for ( k = 0; k < colsX; ++k )
		{
		/* ( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); */
		//( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) );
		x_idx = i + k * rowsX; /Q0/
		y_idx = j + k * rowsY; /Q0/
		W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */
		W_tmp += X_fx[x_idx] * Y_fx[y_idx]; /* Q56 */
		}
		W_tmp = W_tmp * 64; /Q62/
		W_tmp = W_tmp << 6; /Q62/
		( Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /Q30*/
		Zp_fx++;
		}
		@@ -1470,15 +1474,16 @@ Word16 matrix_product_q30_fx(
		{
		for ( i = 0; i < colsX; ++i )
		{
		//( *Zp_fx ) = 0;
		W_tmp = 0;
		for ( k = 0; k < colsX; ++k )
		{
		/* ( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); */
		//( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) );
		x_idx = k + i * rowsX; /Q0/
		y_idx = j + k * rowsY; /Q0/
		W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/
		W_tmp += X_fx[x_idx] * Y_fx[y_idx]; // Q56
		}
		W_tmp = W_tmp * 64; /Q62/
		W_tmp = W_tmp << 6; /Q62/
		( Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /Q30*/

		Zp_fx++;
		@@ -1496,16 +1501,17 @@ Word16 matrix_product_q30_fx(
		{
		for ( i = 0; i < rowsX; ++i )
		{
		//( *Zp_fx ) = 0;
		W_tmp = 0;

		for ( k = 0; k < colsX; ++k )
		{
		/* ( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); */
		//( Zp_fx ) = L_add( Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) );
		x_idx = i + k * rowsX; /Q0/
		y_idx = k + j * rowsY; /Q0/
		W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/
		W_tmp += X_fx[x_idx] * Y_fx[y_idx]; // Q56
		}
		W_tmp = W_tmp * 64; /Q62/
		W_tmp = W_tmp << 6; /Q62/
		( Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /Q30*/

		Zp_fx++;

lib_dec/ivas_qmetadata_dec.c

+27 −40

Original line number	Diff line number	Diff line
		@@ -4348,16 +4348,16 @@ static void read_stream_dct_coeffs_omasa(

		/* deindex */
		#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
		q_dct_data_fx[0] = (Word32) ( ( step_fx * q_idx[0] ) >> 6 ); /* Q25 */
		for ( i = 1; i < len_stream; i++ )
		q_dct_data_fx[0] = L_shl( Mpy_32_16_1( step, shl( q_idx[0], 7 ) ), 2 ); // q = 25
		FOR( i = 1; i < len_stream; i++ )
		{
		if ( ( q_idx[i] & 1 ) == 0 )
		IF( s_and( q_idx[i], 1 ) == 0 )
		{
		q_dct_data_fx[i] = (Word32) ( ( step_fx * ( -q_idx[i] ) ) >> 7 ); /* Q25 */
		q_dct_data_fx[i] = L_shl( Mpy_32_16_1( step, negate( shl( q_idx[i], 6 ) ) ), 2 ); /Q25/
		}
		ELSE
		{
		q_dct_data_fx[i] = (Word32) ( ( step_fx * ( q_idx[i] + 1 ) ) >> 7 ); /* Q25 */
		q_dct_data_fx[i] = L_shl( Mpy_32_16_1( step, shl( q_idx[i] + 1, 6 ) ), 2 ); /Q25/
		}
		}
		#else
		@@ -4430,19 +4430,19 @@ void ivas_omasa_decode_masa_to_total(
		{
		case 4:
		matrix_product_q30_fx( dct4_fx, nblocks, nblocks, 1, q_dct_data_fx, nblocks, 1, 0, dct_data_tmp_fx );
		mvl2l( dct_data_tmp_fx, q_dct_data_fx, nblocks ); /Q30/
		Copy32( dct_data_tmp_fx, q_dct_data_fx, nblocks ); /Q30/
		BREAK;
		case 5:
		matrix_product_q30_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
		mvl2l( dct_data_tmp_fx, q_dct_data_fx, nbands ); /Q30/
		Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /Q30/
		BREAK;
		case 8:
		matrix_product_q30_fx( dct8_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
		mvl2l( dct_data_tmp_fx, q_dct_data_fx, nbands ); /Q30/
		Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /Q30/
		BREAK;
		case 12:
		matrix_product_q30_fx( dct12_fx, nbands, nbands, 1, q_dct_data_fx, nbands, 1, 0, dct_data_tmp_fx );
		mvl2l( dct_data_tmp_fx, q_dct_data_fx, nbands ); /Q30/
		Copy32( dct_data_tmp_fx, q_dct_data_fx, nbands ); /Q30/
		BREAK;
		case 20:
		matrix_product_fx( dct5_fx, nbands, nbands, 1, q_dct_data_fx, nbands, nblocks, 0, dct_data_tmp_fx );
		@@ -4456,49 +4456,40 @@ void ivas_omasa_decode_masa_to_total(
		printf( "Incorrect number of coefficients for OMASA.\n" );
		BREAK;
		}

		/* this is to make sure the comparison to the threshold 0.98 will go the same way in */
		/* fixed point and floating point without having to drag the fixed point values to the */
		/* comparison place in the code; 1052266987 is 0.98 in Q30 it is not needed in the fixed point*/

		for ( i = 0; i < nblocks * nbands; i++ )
		{
		if ( q_dct_data_fx[i] >= 1052266987 && q_dct_data_fx[i] < 1052400000 )
		{
		q_dct_data_fx[i] = 1052400000;
		}
		}

		k = 0;
		for ( i = 0; i < nblocks; i++ )
		move16();
		FOR( i = 0; i < nblocks; i++ )
		{
		for ( j = 0; j < nbands; j++ )
		FOR( j = 0; j < nbands; j++ )
		{
		masa_to_total_energy_ratio[i][j] = q_dct_data_fx[k] / (float) ( 1 << 30 );
		masa_to_total_energy_ratio[i][j] = max( 0.0f, masa_to_total_energy_ratio[i][j] );
		masa_to_total_energy_ratio[i][j] = min( 1.0f, masa_to_total_energy_ratio[i][j] );
		k++;
		masa_to_total_energy_ratio_fx[i][j] = L_max( 0, q_dct_data_fx[k] ); // Q30
		move32();
		masa_to_total_energy_ratio_fx[i][j] = L_min( ONE_IN_Q30, masa_to_total_energy_ratio_fx[i][j] ); /Q30/
		move32();
		k = add( k, 1 );
		}
		}

		if ( nblocks == 1 )
		IF( EQ_16( nblocks, 1 ) )
		{
		for ( i = 1; i < MAX_PARAM_SPATIAL_SUBFRAMES; i++ )
		FOR( i = 1; i < MAX_PARAM_SPATIAL_SUBFRAMES; i++ )
		{
		for ( j = 0; j < nbands; j++ )
		FOR( j = 0; j < nbands; j++ )
		{
		masa_to_total_energy_ratio[i][j] = masa_to_total_energy_ratio[0][j];
		masa_to_total_energy_ratio_fx[i][j] = masa_to_total_energy_ratio_fx[0][j]; /Q30/
		move32();
		}
		}
		}

		if ( nbands == 1 )
		IF( EQ_16( nbands, 1 ) )
		{
		for ( j = 1; j < 5; j++ )
		FOR( j = 1; j < 5; j++ )
		{
		for ( i = 0; i < nblocks; i++ )
		FOR( i = 0; i < nblocks; i++ )
		{
		masa_to_total_energy_ratio[i][j] = masa_to_total_energy_ratio[i][0];
		masa_to_total_energy_ratio_fx[i][j] = masa_to_total_energy_ratio_fx[i][0]; /Q30/
		move32();
		}
		}
		}
		@@ -4572,10 +4563,6 @@ void ivas_omasa_decode_masa_to_total(
		for ( i = 0; i < nblocks; i++ )
		{
		masa_to_total_energy_ratio[i][j] = masa_to_total_energy_ratio[i][0];
		#ifdef NONBE_1231_BASOP_819_THRESHOLD_MASA2TOTAL
		int_tmp = (int32_t) ( MASA_SUR_COH_PRECISION * masa_to_total_energy_ratio[i][j] );
		masa_to_total_energy_ratio[i][j] = (float) ( int_tmp * MASA_SUR_COH_THRESHOLD );
		#endif
		}
		}
		}

Admin message