Merge with rtp-updates-sa4-134 (a38b4bfe) · Commits · IVAS Codec Public Collaboration / IVAS Codec

lib_util/ivas_rtp_pi_data.c

+11 −13

Original line number	Diff line number	Diff line
		@@ -874,18 +874,14 @@ static ivas_error unpackPositionCompact( const uint8_t *buffer, uint32_t numData
		return IVAS_ERROR( IVAS_ERR_RTP_UNPACK_PI_DATA, "Incorrect size to unpack compact position PI data" );
		}

		// int16_t value1 = -100;
		// int16_t value2 = 100;
		// uint16_t value1_uint16 = (uint16_t) value1;
		// uint16_t value2_uint16 = (uint16_t) value2;


		position->size = sizeof( IVAS_PIDATA_POSITION );

		compactRead = ( (int32_t) buffer[0] << 24 ) \| ( (int32_t) buffer[1] << 16 ) \| ( (int32_t) buffer[2] << 8 ) \| ( (int32_t) buffer[3] );
		position->position.x = FLOAT_FROM_Q10( (int16_t) ( compactRead >> 21 ) ) * MAX_PI_COMPACT_POSITION_XY_METERS;
		position->position.y = FLOAT_FROM_Q10( ( (int16_t) ( compactRead >> 10 ) ) & MASK_11BIT ) * MAX_PI_COMPACT_POSITION_XY_METERS;
		position->position.z = FLOAT_FROM_Q9( (int16_t) ( compactRead & MASK_10BIT ) ) * MAX_PI_COMPACT_POSITION_Z_METERS;
		position->position.x = FLOAT_FROM_Q10( (int16_t) ( compactRead >> 21 ) ) * MAX_PI_COMPACT_POSITION_XY_METERS; /* Shift preserves sign bit */
		compactRead = compactRead << 11; /* Discard read bits */
		position->position.y = FLOAT_FROM_Q10( (int16_t) ( compactRead >> 21 ) ) * MAX_PI_COMPACT_POSITION_XY_METERS; /* Shift preserves sign bit */
		compactRead = compactRead << 11; /* Discard read bits */
		position->position.z = FLOAT_FROM_Q9( (int16_t) ( compactRead >> 22 ) ) * MAX_PI_COMPACT_POSITION_Z_METERS; /* Shift preserves sign bit */

		return IVAS_ERR_OK;
		}
		@@ -1011,10 +1007,12 @@ static ivas_error unpackISMPositionCompact( const uint8_t *buffer, uint32_t numD

		for ( n = 0; n < ism_position->numObjects; n++ )
		{
		compactRead = ( (int32_t) buffer[0] << 24 ) \| ( (int32_t) buffer[1] << 16 ) \| ( (int32_t) buffer[2] << 8 ) \| ( (int32_t) buffer[3] );
		ism_position->position[n].x = FLOAT_FROM_Q10( (int16_t) ( compactRead >> 21 ) ) * MAX_PI_COMPACT_POSITION_XY_METERS;
		ism_position->position[n].y = FLOAT_FROM_Q10( ( (int16_t) ( compactRead >> 10 ) ) & MASK_11BIT ) * MAX_PI_COMPACT_POSITION_XY_METERS;
		ism_position->position[n].z = FLOAT_FROM_Q9( (int16_t) ( compactRead & MASK_10BIT ) ) * MAX_PI_COMPACT_POSITION_Z_METERS;
		compactRead = ( (int32_t) buffer[n * 4] << 24 ) \| ( (int32_t) buffer[n * 4 + 1] << 16 ) \| ( (int32_t) buffer[n * 4 + 2] << 8 ) \| ( (int32_t) buffer[n * 4 + 3] );
		ism_position->position[n].x = FLOAT_FROM_Q10( (int16_t) ( compactRead >> 21 ) ) * MAX_PI_COMPACT_POSITION_XY_METERS; /* Shift preserves sign bit */
		compactRead = compactRead << 11; /* Discard read bits */
		ism_position->position[n].y = FLOAT_FROM_Q10( (int16_t) ( compactRead >> 21 ) ) * MAX_PI_COMPACT_POSITION_XY_METERS; /* Shift preserves sign bit */
		compactRead = compactRead << 11; /* Discard read bits */
		ism_position->position[n].z = FLOAT_FROM_Q9( (int16_t) ( compactRead >> 22 ) ) * MAX_PI_COMPACT_POSITION_Z_METERS; /* Shift preserves sign bit */
		}
		return IVAS_ERR_OK;
		}

readme.txt

+65 −63

Original line number	Diff line number	Diff line
		@@ -38,6 +38,7 @@ in ISO/IEC C99. The system is implemented as six separate programs:
		IVAS_cod IVAS Encoder
		IVAS_dec IVAS Decoder
		IVAS_rend IVAS External Renderer
		ISAR_post_rend ISAR Post Renderer
		IVAS_cod_fmtsw IVAS Encoder with support for format switching
		ambi_converter example program for Ambisonics format conversion

		@@ -135,6 +136,7 @@ should have the following structure:
		\|-- lib_lc3plus
		\|-- lib_rend
		\|-- lib_util
		\|-- scripts

		The package includes a Makefile for gcc, which has been verified on
		32-bit Linux systems. The code can be compiled by entering the directory
		@@ -322,14 +324,13 @@ Options:
		left or l or 90->left, right or r or -90->right, center or c or 0->middle
		-exof File : External orientation trajectory File for simulation of external orientations
		-dpid ID : Directivity pattern ID(s) (space-separated list of up to 4 numbers can be
		specified) for binaural output configuration
		-aeid ID \| File : Acoustic environment ID (number > 0) or
		alternatively, it can be a text file where each line contains "ID duration"
		for BINAURAL_ROOM_REVERB output configuration.
		specified) for binaural output configurations
		-aeid ID \| File : Acoustic environment ID (number > 0) or a text file where each line
		contains "ID duration" for BINAURAL_ROOM_REVERB output configuration
		-obj_edit File : Object editing instructions file or NULL for built-in example
		-level level : Complexity level, level = (1, 2, 3), will be defined after characterisation.
		-om File : Coded metadata File for BINAURAL_SPLIT_PCM OutputConf
		Currently, all values default to level 3 (full functionality).
		-level level : Complexity level, level = (1, 2, 3), will be defined after characterisation
		Currently, all values default to level 3 (full functionality)
		-om File : Coded metadata File for BINAURAL_SPLIT_PCM output configuration
		-q : Quiet mode, limit printouts to terminal, default is deactivated


		@@ -361,11 +362,11 @@ Options:
		left or l or 90->left, right or r or -90->right, center or c or 0 ->middle
		-exof File : External orientation trajectory File for simulation of external orientations
		-dpid ID : Directivity pattern ID(s) (space-separated list of up to 4 numbers can be
		specified) for binaural outputs
		-aeid ID \| File : Acoustic environment ID (number > 0)
		alternatively, it can be a text file where each line contains "ID duration" for BINAURAL_ROOM_REVERB output.
		specified) for binaural output configurations
		-aeid ID \| File : Acoustic environment ID (number > 0) or a text file where each line
		contains "ID duration" for BINAURAL_ROOM_REVERB output configuration
		-lp Position : Output LFE position. Comma-delimited triplet of [gain, azimuth, elevation] where gain is linear
		(like --gain, -g) and azimuth, elevation are in degrees.
		(like --gain, -g) and azimuth, elevation are in degrees
		If specified, overrides the default behavior which attempts to map input to output LFE channel(s)
		-lm File : LFE panning matrix File (CSV table) containing a matrix of dimensions
		[ num_input_lfe x num_output_channels ] with elements specifying linear routing gain (like --gain, -g).
		@@ -374,15 +375,14 @@ Options:
		-g : Input gain (linear, not in dB) to be applied to input audio file
		-l : List supported audio formats
		-smd : Metadata Synchronization Delay in ms, Default is 0. Quantized by 5ms subframes.
		-om File : Coded metadata File (only for BINAURAL_SPLIT_PCM output)
		-prbfi File : BFI File (only for BINAURAL_SPLIT_PCM output)
		-level level : Complexity level, level = (1, 2, 3), will be defined after characterisation.
		-om File : Coded metadata File for BINAURAL_SPLIT_PCM output configuration
		-level level : Complexity level, level = (1, 2, 3), will be defined after characterisation
		Currently, all values default to level 3 (full functionality).
		-q : Quiet mode, limit printouts to terminal, default is deactivated


		The usage of the "ISAR_post_rend" program as follows:
		-----------------------------------------------------
		The usage of the "ISAR_post_rend" program is as follows:
		--------------------------------------------------------

		Usage: ISAR_post_rend [options]

		@@ -396,6 +396,34 @@ Options:
		-prbfi File : BFI File


		The usage of the "ambi_converter" program is as follows:
		--------------------------------------------------------

		Usage: ambi_converter input_file output_file input_convention output_convention

		input_convention and output convention must be an integer number in [0,5]
		the following conventions are supported:
		0 : ACN-SN3D
		1 : ACN-N3D
		2 : FuMa-MaxN
		3 : FuMa-FuMa
		4 : SID-SN3D
		5 : SID-N3D

		Either the input or the output convention must always be ACN-SN3D.


		The usage of the "IVAS_cod_fmtsw" program is as follows:
		--------------------------------------------------------

		Usage: IVAS_cod_fmtsw format_switching_file

		Mandatory parameters:
		---------------------
		format_switching_file: Text file containing a valid encoder command line in each line



		MULTICHANNEL LOUDSPEAKER INPUT / OUTPUT CONFIGURATIONS
		======================================================
		The loudspeaker positions for each MC layouts are assumed to have the following azimuth and elevation
		@@ -423,31 +451,6 @@ omitted, the LFE input is downmixed to all channels with a factor of 1/N. Positi
		the LFE channel. Maximum number of supported loudskpeakers N is 16.
		An example custom loudspeaker layout file is available: ls_setup_16ch_8+4+4.txt

		The usage of the "ambi_converter" program as follows:
		-----------------------------------------------------

		Usage: ambi_converter input_file output_file input_convention output_convention

		input_convention and output convention must be an integer number in [0,5]
		the following conventions are supported:
		0 : ACN-SN3D
		1 : ACN-N3D
		2 : FuMa-MaxN
		3 : FuMa-FuMa
		4 : SID-SN3D
		5 : SID-N3D

		Either the input or the output convention must always be ACN-SN3D.

		The usage of the "IVAS_cod_fmtsw" program is as follows:
		--------------------------------------------------------

		Usage: IVAS_cod_fmtsw format_switching_file

		Mandatory parameters:
		---------------------
		format_switching_file: Text file containing a valid encoder command line in each line


		RUNNING THE SELF TEST
		=====================
		@@ -720,4 +723,3 @@ typedef struct {
		u_int32 length; /* size of the RTP packet in bytes */
		(u_int8 * length) RTP_packet; /* RTP packet (sized length * byte) */
		} RTP_streaming_packet;

tests/rtp/ivasrtp.py

+11 −11

Original line number	Diff line number	Diff line
		@@ -847,17 +847,17 @@ def packPositionsCompact(bitstrm: BitStream, data: any):
		#posX = np.uint16(q10(position.x / 10.24))
		#posY = np.uint16(q10(position.y / 10.24))
		#posZ = np.uint16(q9(position.z / 5.12))
		posX = ctypes.c_int16( q10(position.x / 10.24) ).value
		posY = ctypes.c_int16( q10(position.y / 10.24) ).value
		posZ = ctypes.c_int16( q9(position.z / 5.12) ).value
		bitstrm.append(f"uintbe:8={ctypes.c_uint8(posX >> 3).value}")
		bitstrm.append(f"uintbe:8={ctypes.c_uint8(( (posX & MASK_3BIT) << 5 ) \| (posY >> 6)).value}")
		bitstrm.append(f"uintbe:8={ctypes.c_uint8(( (posY & MASK_6BIT) << 2 ) \| (posZ >> 8)).value}")
		bitstrm.append(f"uintbe:8={(posZ >> 2)}")

		#bitstrm.append(f"int:11={q10(position.x / 10.24)}")
		#bitstrm.append(f"int:11={q10(position.y / 10.24)}")
		#bitstrm.append(f"int:10={q9(position.z / 5.12)}")
		# posX = ctypes.c_int16( q10(position.x / 10.24) ).value
		# posY = ctypes.c_int16( q10(position.y / 10.24) ).value
		# posZ = ctypes.c_int16( q9(position.z / 5.12) ).value
		# bitstrm.append(f"uintbe:8={ctypes.c_uint8(posX >> 3).value}")
		# bitstrm.append(f"uintbe:8={ctypes.c_uint8(( (posX & MASK_3BIT) << 5 ) \| (posY >> 6)).value}")
		# bitstrm.append(f"uintbe:8={ctypes.c_uint8(( (posY & MASK_6BIT) << 2 ) \| (posZ >> 8)).value}")
		# bitstrm.append(f"uintbe:8={(posZ >> 2)}")

		bitstrm.append(f"int:11={q10(position.x / 10.24)}")
		bitstrm.append(f"int:11={q10(position.y / 10.24)}")
		bitstrm.append(f"int:10={q9(position.z / 5.12)}")


		def unpackOrientation(bitstrm: ConstBitStream, piSize: int) -> ORIENTATION:

tests/rtp/test_rtp.py

+3 −14

Original line number	Diff line number	Diff line
		@@ -198,15 +198,10 @@ def generatePiData(startTs: int, endTs: int) -> dict:
		y=random.randint(-32788, 32767) / 100.0,
		z=random.randint(-32788, 32767) / 100.0,
		)
		#somePositionCompact = lambda: POSITION(
		# x=random.randint(-1024, 1023) / 100.0,
		# y=random.randint(-1024, 1023) / 100.0,
		# z=random.randint(-512, 511) / 100.0,
		#)
		somePositionCompact = lambda: POSITION(
		x=1.0,
		y=1.0,
		z=-1.0,
		x=random.randint(-1024, 1023) / 100.0,
		y=random.randint(-1024, 1023) / 100.0,
		z=random.randint(-512, 511) / 100.0,
		)
		someDesc = lambda: AUDIO_DESCRIPTION(
		isSpeech=bool(random.getrandbits(1)),
		@@ -285,11 +280,6 @@ def generatePiData(startTs: int, endTs: int) -> dict:

		for ts in range(startTs, endTs, 320):
		pidata = dict()

		pidata["ISM_NUM"] = someNumISM()
		pidata["ISM_POSITION_COMPACT"] = someISMPositionsCompact(pidata["ISM_NUM"].num)
		#pidata["R_ISM_POSITION_COMPACT"] = somePositionCompact()
		'''
		pidata["SCENE_ORIENTATION"] = someOrientation()
		pidata["DEVICE_ORIENTATION_COMPENSATED"] = someOrientation()
		pidata["DEVICE_ORIENTATION_UNCOMPENSATED"] = someOrientation()
		@@ -317,7 +307,6 @@ def generatePiData(startTs: int, endTs: int) -> dict:
		pidata["R_ISM_POSITION"] = somePosition()
		pidata["R_ISM_POSITION_COMPACT"] = somePositionCompact()
		pidata["R_ISM_DIRECTION"] = someReverseISMDirection()
		'''
		data[str(ts)] = pidata
		return data