diff --git a/apps/decoder.c b/apps/decoder.c index f2f0e31b3dfbdbed85b3ba823f62d44281a80b3d..da0fdd2fbd119927e32b4542413f7b171f5e1c2d 100644 --- a/apps/decoder.c +++ b/apps/decoder.c @@ -216,8 +216,8 @@ static const char *PiDataNames[IVAS_PI_MAX_ID] = { "SCENE_ORIENTATION", "DEVICE_ORIENTATION_COMPENSATED", "DEVICE_ORIENTATION_UNCOMPENSATED", "ACOUSTIC_ENVIRONMENT", "AUDIO_DESCRIPTION", "ISM_NUM", "ISM_ID", "ISM_GAIN", "ISM_ORIENTATION", "ISM_POSITION", "ISM_DISTANCE_ATTENUATION", "ISM_DIRECTIVITY", "DIEGETIC_TYPE", "RESERVED13", - "RESERVED14", "RESERVED15", "PLAYBACK_DEVICE_ORIENTATION", "HEAD_ORIENTATION", "LISTENER_POSITION", - "DYNAMIC_AUDIO_SUPPRESSION", "AUDIO_FOCUS_DIRECTION", "PI_LATENCY", "R_ISM_ID", "R_ISM_GAIN", + "AUDIO_FOCUS_INDICATION", "RESERVED15", "PLAYBACK_DEVICE_ORIENTATION", "HEAD_ORIENTATION", "LISTENER_POSITION", + "DYNAMIC_AUDIO_SUPPRESSION", "AUDIO_FOCUS_REQUEST", "PI_LATENCY", "R_ISM_ID", "R_ISM_GAIN", "R_ISM_ORIENTATION", "R_ISM_POSITION", "R_ISM_DIRECTION", "RESERVED27", "RESERVED28", "RESERVED29", "RESERVED30", "NO_DATA" }; @@ -261,7 +261,6 @@ static void IVAS_RTP_LogPiData( FILE *f_piDataOut, PIDATA_TS *piData, uint32_t n #ifdef RTP_S4_251135_CR26253_0016_REV1 case IVAS_PI_PLAYBACK_DEVICE_ORIENTATION: case IVAS_PI_HEAD_ORIENTATION: - case IVAS_PI_AUDIO_FOCUS_DIRECTION: case IVAS_PI_R_ISM_ORIENTATION: #endif { @@ -327,6 +326,26 @@ static void IVAS_RTP_LogPiData( FILE *f_piDataOut, PIDATA_TS *piData, uint32_t n fprintf( f_piDataOut, "\t\t\t]\n\t\t}" ); } break; + case IVAS_PI_AUDIO_FOCUS_INDICATION: + { + fprintf( f_piDataOut, "{" ); + if ( cur->data.focusIndication.availDirection ) + { + fprintf( f_piDataOut, "\n\t\t\t\"direction\": {\n" ); + fprintf( f_piDataOut, "\t\t\t\t\t\t\"w\": %f,\n\t\t\t\t\t\t\"x\": %f,\n\t\t\t\t\t\t\"y\": %f,\n\t\t\t\t\t\t\"z\": %f \n\t\t\t}", + cur->data.focusIndication.direction.w, cur->data.focusIndication.direction.x, cur->data.focusIndication.direction.y, cur->data.focusIndication.direction.z ); + if ( cur->data.focusIndication.availLevel ) + { + fprintf( f_piDataOut, "," ); + } + } + if ( cur->data.focusIndication.availLevel ) + { + fprintf( f_piDataOut, "\n\t\t\t\"level\": %d", cur->data.focusIndication.flvl ); + } + fprintf( f_piDataOut, "\n\t\t}" ); + } + break; case IVAS_PI_DYNAMIC_AUDIO_SUPPRESSION: { IVAS_PIDATA_DYNAMIC_SUPPRESSION *das = &cur->data.dynSuppression; @@ -339,7 +358,26 @@ static void IVAS_RTP_LogPiData( FILE *f_piDataOut, PIDATA_TS *piData, uint32_t n } break; case IVAS_PI_RESERVED13: - case IVAS_PI_RESERVED14: + case IVAS_PI_AUDIO_FOCUS_REQUEST: + { + fprintf( f_piDataOut, "{" ); + if ( cur->data.focusRequest.availDirection ) + { + fprintf( f_piDataOut, "\n\t\t\t\"direction\": {\n" ); + fprintf( f_piDataOut, "\t\t\t\t\t\t\"w\": %f,\n\t\t\t\t\t\t\"x\": %f,\n\t\t\t\t\t\t\"y\": %f,\n\t\t\t\t\t\t\"z\": %f \n\t\t\t}", + cur->data.focusRequest.direction.w, cur->data.focusRequest.direction.x, cur->data.focusRequest.direction.y, cur->data.focusRequest.direction.z ); + if ( cur->data.focusRequest.availLevel ) + { + fprintf( f_piDataOut, "," ); + } + } + if ( cur->data.focusRequest.availLevel ) + { + fprintf( f_piDataOut, "\n\t\t\t\"level\": %d", cur->data.focusRequest.flvl ); + } + fprintf( f_piDataOut, "\n\t\t}" ); + } + break; case IVAS_PI_RESERVED15: case IVAS_PI_RESERVED27: case IVAS_PI_RESERVED28: @@ -534,6 +572,15 @@ static ivas_error IVAS_RTP_ApplyPiData( IVAS_RTP *rtp, IVAS_DEC_HANDLE hIvasDec, } break; +#ifdef RTP_S4_251135_CR26253_0016_REV1 + case IVAS_PI_DIEGETIC_TYPE: + { + DEBUG_PRINT( stdout, "PI_DIEGETIC_TYPE : %d, %d, %d, %d, %d\n", piData->data.digeticIndicator.isDiegetic[0], piData->data.digeticIndicator.isDiegetic[1], piData->data.digeticIndicator.isDiegetic[2], piData->data.digeticIndicator.isDiegetic[3], piData->data.digeticIndicator.isDiegetic[4] ); + IVAS_DEC_setDiegeticInputPI( hIvasDec, piData->data.digeticIndicator.isDiegetic ); + } + break; + +#endif default: { fprintf( stderr, "Unhandled PI data of type : %s\n", PiDataNames[piDataType] ); diff --git a/lib_com/options.h b/lib_com/options.h index 58ca69d57098444617041c3ba987e8c7ae98bca3..b1f59650277f54972c61d6c1838d8f265c041402 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -158,7 +158,7 @@ /* ################## Start DEVELOPMENT switches ######################### */ -//#define RTP_S4_251135_CR26253_0016_REV1 /* RTP Pack/Unpack API corresponding to CR 26253 */ +#define RTP_S4_251135_CR26253_0016_REV1 /* RTP Pack/Unpack API corresponding to CR 26253 */ #define IVAS_RTPDUMP /* RTPDUMP writing and reading for IVAS payloads */ /* ################### Start BE switches ################################# */ diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index 4d27cb168ed20aeda3d0108269b961615f0d59f5..c3b7e69a829728e6207f44274e3312401c359d6a 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -3827,6 +3827,31 @@ void IVAS_DEC_resetExternalOrientations( } +#endif +#ifdef RTP_S4_251135_CR26253_0016_REV1 +/*---------------------------------------------------------------------* + * IVAS_DEC_setDiegeticInput( ) + * + * Set isDiegeticInput flag for combined orientation handle based on PI data. + *---------------------------------------------------------------------*/ + +void IVAS_DEC_setDiegeticInputPI( + IVAS_DEC_HANDLE hIvasDec, /* i/o: IVAS decoder handle */ + const bool *diegeticPIValues /* i : diegetic values for the input stream */ +) +{ + if ( hIvasDec->st_ivas->hCombinedOrientationData != NULL ) + { + int8_t i; + for ( i = 0; i < (1 + IVAS_MAX_NUM_OBJECTS); i++ ) + { + hIvasDec->st_ivas->hCombinedOrientationData->isDiegeticInputPI[i] = diegeticPIValues[i]; + } + hIvasDec->st_ivas->hCombinedOrientationData->isDiegeticInputPISet = true; + } +} + + #endif /*---------------------------------------------------------------------* * IVAS_DEC_VoIP_IsEmpty( ) diff --git a/lib_dec/lib_dec.h b/lib_dec/lib_dec.h index 2e98d9e0f1e5547ba8f73c6b5db3d96ea43f30a6..caed7bd54b0f29874f0557ab0703efd538cbf425 100644 --- a/lib_dec/lib_dec.h +++ b/lib_dec/lib_dec.h @@ -349,6 +349,13 @@ void IVAS_DEC_resetExternalOrientations( IVAS_DEC_HANDLE hIvasDec /* i/o: IVAS decoder handle */ ); +#endif +#ifdef RTP_S4_251135_CR26253_0016_REV1 +void IVAS_DEC_setDiegeticInputPI( + IVAS_DEC_HANDLE hIvasDec, /* i/o: IVAS decoder handle */ + const bool *diegeticPIValues /* i : diegetic values for the input stream */ +); + #endif /* Setter functions - apply changes to decoder configuration */ diff --git a/lib_rend/ivas_rotation.c b/lib_rend/ivas_rotation.c index 9a1d74581da922928edefa6bb34ff03f69a6dea7..8b62d7a111744a2a5bd7fa48d29e7ad92534fa8f 100644 --- a/lib_rend/ivas_rotation.c +++ b/lib_rend/ivas_rotation.c @@ -862,6 +862,14 @@ ivas_error ivas_combined_orientation_open( ( *hCombinedOrientationData )->subframe_idx = 0; ( *hCombinedOrientationData )->subframe_size = (int16_t) ( fs / ( FRAMES_PER_SEC * MAX_PARAM_SPATIAL_SUBFRAMES ) ); ( *hCombinedOrientationData )->cur_subframe_samples_rendered = 0; +#ifdef RTP_S4_251135_CR26253_0016_REV1 + + for ( i = 0; i < (1 + IVAS_MAX_NUM_OBJECTS); i++ ) + { + ( *hCombinedOrientationData )->isDiegeticInputPI[i] = true; + } + ( *hCombinedOrientationData )->isDiegeticInputPISet = false; +#endif return IVAS_ERR_OK; } @@ -1022,11 +1030,30 @@ ivas_error combine_external_and_head_orientations( } else if ( hExtOrientationData == NULL && headRotQuaternions != NULL ) { +#ifdef RTP_S4_251135_CR26253_0016_REV1 + /* Disable head rotation if diegetic PI data indicating non-diegetic audio is received */ + if ( hCombinedOrientationData->isDiegeticInputPISet && !hCombinedOrientationData->isDiegeticInputPI[0] && !hCombinedOrientationData->isDiegeticInputPI[1] && !hCombinedOrientationData->isDiegeticInputPI[2] && !hCombinedOrientationData->isDiegeticInputPI[3] && !hCombinedOrientationData->isDiegeticInputPI[4] ) + { + for ( i = 0; i < hCombinedOrientationData->num_subframes; i++ ) + { + hCombinedOrientationData->Quaternions[i] = identity; + } + } + else + { + /* Head rotation only */ + for ( i = 0; i < hCombinedOrientationData->num_subframes; i++ ) + { + hCombinedOrientationData->Quaternions[i] = headRotQuaternions[i]; + } + } +#else /* Head rotation only */ for ( i = 0; i < hCombinedOrientationData->num_subframes; i++ ) { hCombinedOrientationData->Quaternions[i] = headRotQuaternions[i]; } +#endif } if ( hExtOrientationData != NULL ) @@ -1103,6 +1130,40 @@ ivas_error combine_external_and_head_orientations( hCombinedOrientationData->Quaternion_frozen_head = identity; hCombinedOrientationData->isHeadRotationFrozen = 0; } +#ifdef RTP_S4_251135_CR26253_0016_REV1 + /* Disable head rotation if diegetic PI data indicating non-diegetic audio is received */ + if ( hCombinedOrientationData->isDiegeticInputPISet && !hCombinedOrientationData->isDiegeticInputPI[0] && !hCombinedOrientationData->isDiegeticInputPI[1] && !hCombinedOrientationData->isDiegeticInputPI[2] && !hCombinedOrientationData->isDiegeticInputPI[3] && !hCombinedOrientationData->isDiegeticInputPI[4] ) + { + continue; + } + else + { + /* Use the most recent head rotation */ + if ( hExtOrientationData->enableHeadRotation[i] == 1 ) + { + if ( hExtOrientationData->enableExternalOrientation[i] > 0 ) + { + QuaternionProduct( hCombinedOrientationData->Quaternions[i], headRotQuaternions[i], &hCombinedOrientationData->Quaternions[i] ); + } + else + { + hCombinedOrientationData->Quaternions[i] = headRotQuaternions[i]; + } + } + /* Use the freezed head rotation */ + else if ( hExtOrientationData->enableHeadRotation[i] == 2 ) + { + if ( hExtOrientationData->enableExternalOrientation[i] > 0 ) + { + QuaternionProduct( hCombinedOrientationData->Quaternions[i], hCombinedOrientationData->Quaternion_frozen_head, &hCombinedOrientationData->Quaternions[i] ); + } + else + { + hCombinedOrientationData->Quaternions[i] = hCombinedOrientationData->Quaternion_frozen_head; + } + } + } +#else /* Use the most recent head rotation */ if ( hExtOrientationData->enableHeadRotation[i] == 1 ) { @@ -1127,6 +1188,7 @@ ivas_error combine_external_and_head_orientations( hCombinedOrientationData->Quaternions[i] = hCombinedOrientationData->Quaternion_frozen_head; } } +#endif /* Reset the combined orientations to identity */ if ( hExtOrientationData->enableHeadRotation[i] == 0 && hExtOrientationData->enableExternalOrientation[i] == 0 ) diff --git a/lib_rend/ivas_stat_rend.h b/lib_rend/ivas_stat_rend.h index 69fcb30fb803f590f19fb50517892a98ecc97f9c..3e6270e22985fbea579e6369a3027e469f5e8a3b 100644 --- a/lib_rend/ivas_stat_rend.h +++ b/lib_rend/ivas_stat_rend.h @@ -710,6 +710,10 @@ typedef struct ivas_combined_orientation_struct int16_t cur_subframe_samples_rendered; int16_t subframe_idx_start; int16_t cur_subframe_samples_rendered_start; +#ifdef RTP_S4_251135_CR26253_0016_REV1 + bool isDiegeticInputPI[1 + IVAS_MAX_NUM_OBJECTS]; + bool isDiegeticInputPISet; +#endif } COMBINED_ORIENTATION_DATA, *COMBINED_ORIENTATION_HANDLE; /*----------------------------------------------------------------------------------* diff --git a/lib_util/ivas_rtp_pi_data.c b/lib_util/ivas_rtp_pi_data.c index e7dcde89983700384e447221e9431340bb2cac71..c939c4539c4f7f643df590bd482865aff4c4c398 100644 --- a/lib_util/ivas_rtp_pi_data.c +++ b/lib_util/ivas_rtp_pi_data.c @@ -132,7 +132,7 @@ static ivas_error packOrientation( const IVAS_PIDATA_GENERIC *piData, uint8_t *b if ( ( piData->piDataType != IVAS_PI_SCENE_ORIENTATION ) && ( piData->piDataType != IVAS_PI_DEVICE_ORIENTATION_COMPENSATED ) && ( piData->piDataType != IVAS_PI_DEVICE_ORIENTATION_UNCOMPENSATED ) #ifdef RTP_S4_251135_CR26253_0016_REV1 - && ( piData->piDataType != IVAS_PI_PLAYBACK_DEVICE_ORIENTATION ) && ( piData->piDataType != IVAS_PI_HEAD_ORIENTATION ) && ( piData->piDataType != IVAS_PI_AUDIO_FOCUS_DIRECTION ) + && ( piData->piDataType != IVAS_PI_PLAYBACK_DEVICE_ORIENTATION ) && ( piData->piDataType != IVAS_PI_HEAD_ORIENTATION ) #endif /* RTP_S4_251135_CR26253_0016_REV1 */ ) { @@ -544,6 +544,95 @@ static ivas_error unpackDiegetic( const uint8_t *buffer, uint32_t numDataBytes, return IVAS_ERR_OK; } +static ivas_error packAudioFocusCommon( const IVAS_PIDATA_GENERIC *piData, uint8_t *buffer, uint32_t maxDataBytes, uint32_t *nBytesWritten ) +{ + uint32_t nBytes = 0; + uint8_t packedSize = 1; + const IVAS_PIDATA_AUDIO_FOCUS *audioFocus = (const IVAS_PIDATA_AUDIO_FOCUS *) piData; + + *nBytesWritten = 0; + + if ( piData->size != sizeof( IVAS_PIDATA_AUDIO_FOCUS ) ) + { + return IVAS_ERROR( IVAS_ERR_WRONG_PARAMS, "Incorrect size in PI data of type Audio Focus" ); + } + + if ( audioFocus->availDirection && audioFocus->availLevel ) + { + packedSize = 9; + } + else if ( audioFocus->availDirection ) + { + packedSize = 8; + } + else if ( audioFocus->availLevel ) + { + packedSize = 1; + } + else + { + return IVAS_ERROR( IVAS_ERR_WRONG_PARAMS, "Neither direction or level is available for packing Audio Focus" ); + } + + /* Audio Focus data is packedSize bytes, header is 2 bytes */ + if ( maxDataBytes < (uint32_t) packedSize + 2 ) + { + return IVAS_ERROR( IVAS_ERR_WRONG_PARAMS, "Insufficient space to pack Audio Focus PI data" ); + } + + buffer[nBytes++] = ( audioFocus->piDataType & MASK_5BIT ); /* PF/PM populated during final packing */ + buffer[nBytes++] = packedSize; + + if ( packedSize == 9 || packedSize == 8 ) + { + nBytes = writeInt16( buffer, nBytes, ivasPayload_convertToQ15( audioFocus->direction.w ) ); + nBytes = writeInt16( buffer, nBytes, ivasPayload_convertToQ15( audioFocus->direction.x ) ); + nBytes = writeInt16( buffer, nBytes, ivasPayload_convertToQ15( audioFocus->direction.y ) ); + nBytes = writeInt16( buffer, nBytes, ivasPayload_convertToQ15( audioFocus->direction.z ) ); + } + if ( packedSize == 9 || packedSize == 1) + { + buffer[nBytes++] = ( (uint8_t) audioFocus->flvl & MASK_4BIT ) << 4; + } + + *nBytesWritten = nBytes; + return IVAS_ERR_OK; +} + +static ivas_error unpackAudioFocusCommon( const uint8_t *buffer, uint32_t numDataBytes, IVAS_PIDATA_GENERIC *piData ) +{ + IVAS_PIDATA_AUDIO_FOCUS *audioFocus = (IVAS_PIDATA_AUDIO_FOCUS *) piData; + + /* Audio Focus data is either 1, 8 or 9 bytes */ + if ( numDataBytes != 1 && numDataBytes != 8 && numDataBytes != 9 ) + { + return IVAS_ERROR( IVAS_ERR_RTP_UNPACK_PI_DATA, "Incorrect size to unpack PI data of type Audio Focus" ); + } + + piData->size = sizeof( IVAS_PIDATA_AUDIO_FOCUS ); + audioFocus->availDirection = ( numDataBytes >= 8 ); + audioFocus->availLevel = ( numDataBytes == 1 || numDataBytes == 9 ); + + if ( numDataBytes == 1 ) + { + audioFocus->flvl = ( buffer[0] >> 4 ); + } + else + { + audioFocus->direction.w = FLOAT_FROM_Q15( readInt16( &buffer[0] ) ); + audioFocus->direction.x = FLOAT_FROM_Q15( readInt16( &buffer[2] ) ); + audioFocus->direction.y = FLOAT_FROM_Q15( readInt16( &buffer[4] ) ); + audioFocus->direction.z = FLOAT_FROM_Q15( readInt16( &buffer[6] ) ); + + if ( numDataBytes == 9 ) + { + audioFocus->flvl = ( buffer[8] >> 4 ); + } + } + + return IVAS_ERR_OK; +} + #endif /* RTP_S4_251135_CR26253_0016_REV1 */ @@ -570,14 +659,18 @@ static const PACK_PI_FN packPiDataFuntions[IVAS_PI_MAX_ID] = { packUnsupportedData, /* DIEGETIC_TYPE */ #endif packUnsupportedData, /* RESERVED13 */ - packUnsupportedData, /* RESERVED14 */ +#ifdef RTP_S4_251135_CR26253_0016_REV1 + packAudioFocusCommon,/* AUDIO_FOCUS_INDICATION */ +#else + packUnsupportedData, /* AUDIO_FOCUS_INDICATION */ +#endif packUnsupportedData, /* RESERVED15 */ #ifdef RTP_S4_251135_CR26253_0016_REV1 packOrientation, /* PLAYBACK_DEVICE_ORIENTATION */ packOrientation, /* HEAD_ORIENTATION */ packListenerPosition, /* LISTENER_POSITION */ packDynamicSuppression, /* DYNAMIC_AUDIO_SUPPRESSION */ - packOrientation, /* AUDIO_FOCUS_DIRECTION */ + packAudioFocusCommon, /* AUDIO_FOCUS_REQUEST */ #else packUnsupportedData, /* PLAYBACK_DEVICE_ORIENTATION */ packUnsupportedData, /* HEAD_ORIENTATION */ @@ -625,14 +718,18 @@ static const UNPACK_PI_FN unpackPiDataFuntions[IVAS_PI_MAX_ID] = { unpackUnsupportedData, /* DIEGETIC_TYPE */ #endif unpackUnsupportedData, /* RESERVED13 */ - unpackUnsupportedData, /* RESERVED14 */ +#ifdef RTP_S4_251135_CR26253_0016_REV1 + unpackAudioFocusCommon,/* AUDIO_FOCUS_INDICATION */ +#else + unpackUnsupportedData, /* AUDIO_FOCUS_INDICATION */ +#endif unpackUnsupportedData, /* RESERVED15 */ #ifdef RTP_S4_251135_CR26253_0016_REV1 unpackOrientation, /* PLAYBACK_DEVICE_ORIENTATION */ unpackOrientation, /* HEAD_ORIENTATION */ unpackListenerPosition, /* LISTENER_POSITION */ unpackDynamicSuppression, /* DYNAMIC_AUDIO_SUPPRESSION */ - unpackOrientation, /* AUDIO_FOCUS_DIRECTION */ + unpackAudioFocusCommon, /* AUDIO_FOCUS_REQUEST */ #else unpackUnsupportedData, /* PLAYBACK_DEVICE_ORIENTATION */ unpackUnsupportedData, /* HEAD_ORIENTATION */ @@ -672,13 +769,13 @@ static const uint32_t maxPiDataSize[IVAS_PI_MAX_ID] = { 8, /* IVAS_PI_ISM_DIRECTIVITY */ 1, /* IVAS_PI_DIEGETIC_TYPE */ 0, /* IVAS_PI_RESERVED13 */ - 0, /* IVAS_PI_RESERVED14 */ + 9, /* IVAS_PI_AUDIO_FOCUS_INDICATION */ 0, /* IVAS_PI_RESERVED15 */ 8, /* IVAS_PI_PLAYBACK_DEVICE_ORIENTATION */ 8, /* IVAS_PI_HEAD_ORIENTATION */ 6, /* IVAS_PI_LISTENER_POSITION */ 2, /* IVAS_PI_DYNAMIC_AUDIO_SUPPRESSION */ - 8, /* IVAS_PI_AUDIO_FOCUS_DIRECTION */ + 9, /* IVAS_PI_AUDIO_FOCUS_REQUEST */ 4, /* IVAS_PI_PI_LATENCY */ 1, /* IVAS_PI_R_ISM_ID */ 1, /* IVAS_PI_R_ISM_GAIN */ diff --git a/lib_util/ivas_rtp_pi_data.h b/lib_util/ivas_rtp_pi_data.h index 3be265bb87a46b2f9dad7a5d6020323b59f111cc..aad50dd6b653ee582793e25c5967a8eab996c723 100644 --- a/lib_util/ivas_rtp_pi_data.h +++ b/lib_util/ivas_rtp_pi_data.h @@ -64,7 +64,7 @@ extern "C" IVAS_PI_ISM_DIRECTIVITY, /* directivity of each object */ IVAS_PI_DIEGETIC_TYPE, /* digetic audio indication */ IVAS_PI_RESERVED13, /* reserved */ - IVAS_PI_RESERVED14, /* reserved */ + IVAS_PI_AUDIO_FOCUS_INDICATION, /* audio focus indication (direction in Quaternions and/or level) */ IVAS_PI_RESERVED15, /* reserved */ /* Reverse direction PI types */ @@ -72,7 +72,7 @@ extern "C" IVAS_PI_HEAD_ORIENTATION, /* head orientation of the listener in Quaternions */ IVAS_PI_LISTENER_POSITION, /* position of the listener in 3D space */ IVAS_PI_DYNAMIC_AUDIO_SUPPRESSION, /* receiver’s preference with respect to audio suppression */ - IVAS_PI_AUDIO_FOCUS_DIRECTION, /* direction of interest for the listener in Quaternions */ + IVAS_PI_AUDIO_FOCUS_REQUEST, /* direction of interest for the listener in Quaternions and/or audio focus level */ IVAS_PI_PI_LATENCY, /* round-trip latency for PI frames */ IVAS_PI_R_ISM_ID, /* id of an object for editing */ IVAS_PI_R_ISM_GAIN, /* editing request for gain factor for received object */ @@ -100,7 +100,6 @@ extern "C" * - IVAS_PI_DEVICE_ORIENTATION_UNCOMPENSATED * - IVAS_PI_PLAYBACK_DEVICE_ORIENTATION * - IVAS_PI_HEAD_ORIENTATION - * - IVAS_PI_AUDIO_FOCUS_DIRECTION * - IVAS_PI_R_ISM_ORIENTATION * * piDataType is used to identify the correct pi data type contained here @@ -293,6 +292,40 @@ extern "C" bool isDiegetic[1 + IVAS_PI_MAX_OBJECTS]; /* diegetic indication as per audio format */ } IVAS_PIDATA_DIEGETIC; + /* Audio focus direction indicates a direction of interest. + * The audio focus level indicates the amount of suppression applied to the + * directions other than the audio focus direction. + */ + typedef enum + { + IVAS_FLVL_NO_AUDIO_FOCUS = 0, /* Apply no audio focus */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_1, /* Audio focus level 1 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_2, /* Audio focus level 2 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_3, /* Audio focus level 3 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_4, /* Audio focus level 4 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_5, /* Audio focus level 5 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_6, /* Audio focus level 6 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_7, /* Audio focus level 7 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_8, /* Audio focus level 8 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_9, /* Audio focus level 9 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_10, /* Audio focus level 10 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_11, /* Audio focus level 11 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_12, /* Audio focus level 12 */ + IVAS_FLVL_FOCUS_LEVEL_LEVEL_13, /* Audio focus level 13 */ + IVAS_FLVL_DEFAULT_AUDIO_FOCUS, /* Default audio focus */ + IVAS_FLVL_MAX_AUDIO_FOCUS, /* Apply max audio focus */ + } IVAS_FLVL; + + typedef struct + { + size_t size; /* sizeof(IVAS_PIDATA_AUDIO_FOCUS) */ + uint32_t piDataType; /* IVAS_PI_AUDIO_FOCUS_INDCATION or IVAS_PI_AUDIO_FOCUS_REQUEST */ + bool availDirection; /* audio focus contains direction */ + bool availLevel; /* audio focus contains level */ + IVAS_QUATERNION direction; /* direction data expressed as quarternions */ + IVAS_FLVL flvl; /* audio focus level */ + } IVAS_PIDATA_AUDIO_FOCUS; + /* Listener position */ typedef struct { @@ -416,12 +449,13 @@ extern "C" IVAS_PIDATA_ISM_ATTENUATION ismAttenuation; IVAS_PIDATA_ISM_DIRECTIVITY ismDirectivity; IVAS_PIDATA_DIEGETIC digeticIndicator; + IVAS_PIDATA_AUDIO_FOCUS focusIndication; IVAS_PIDATA_ORIENTATION playbackOrientation; IVAS_PIDATA_ORIENTATION headOrientation; IVAS_PIDATA_LISTENER_POSITION listnerPosition; IVAS_PIDATA_DYNAMIC_SUPPRESSION dynSuppression; - IVAS_PIDATA_ORIENTATION focusDirection; + IVAS_PIDATA_AUDIO_FOCUS focusRequest; IVAS_PIDATA_REVERSE_PI_LATENCY piLatency; IVAS_PIDATA_ISM_EDIT_ID ismEditId; IVAS_PIDATA_ISM_EDIT_GAIN ismEditGain; diff --git a/tests/rtp/ivasrtp.py b/tests/rtp/ivasrtp.py index f6923c007753f1cc3eee9ee84064fd4d3e7aa1b6..1eec45fdce0a92ec64d9db10ef3a535c63a666b0 100644 --- a/tests/rtp/ivasrtp.py +++ b/tests/rtp/ivasrtp.py @@ -42,7 +42,7 @@ import json import base64 import argparse from pathlib import Path -from typing import cast +from typing import cast, Optional NO_REQ="NO_REQ" @@ -170,13 +170,13 @@ class PIDATAS(str, Enum): ISM_DIRECTIVITY = "ISM_DIRECTIVITY" DIEGETIC_TYPE = "DIEGETIC_TYPE" RESERVED13 = "RESERVED13" - RESERVED14 = "RESERVED14" + AUDIO_FOCUS_INDICATION = "AUDIO_FOCUS_INDICATION" RESERVED15 = "RESERVED15" PLAYBACK_DEVICE_ORIENTATION = "PLAYBACK_DEVICE_ORIENTATION" HEAD_ORIENTATION = "HEAD_ORIENTATION" LISTENER_POSITION = "LISTENER_POSITION" DYNAMIC_AUDIO_SUPPRESSION = "DYNAMIC_AUDIO_SUPPRESSION" - AUDIO_FOCUS_DIRECTION = "AUDIO_FOCUS_DIRECTION" + AUDIO_FOCUS_REQUEST = "AUDIO_FOCUS_REQUEST" PI_LATENCY = "PI_LATENCY" R_ISM_ID = "R_ISM_ID" R_ISM_GAIN = "R_ISM_GAIN" @@ -207,6 +207,23 @@ class SUPPRESSION_LEVEL(int, Enum): SUPPRESSION_LEVEL_14 = 14 SUPPRESSION_LEVEL_MAX = 15 +class AUDIO_FOCUS_LEVEL(int, Enum): + AUDIO_FOCUS_LEVEL_NONE = 0 + AUDIO_FOCUS_LEVEL_1 = 1 + AUDIO_FOCUS_LEVEL_2 = 2 + AUDIO_FOCUS_LEVEL_3 = 3 + AUDIO_FOCUS_LEVEL_4 = 4 + AUDIO_FOCUS_LEVEL_5 = 5 + AUDIO_FOCUS_LEVEL_6 = 6 + AUDIO_FOCUS_LEVEL_7 = 7 + AUDIO_FOCUS_LEVEL_8 = 8 + AUDIO_FOCUS_LEVEL_9 = 9 + AUDIO_FOCUS_LEVEL_10 = 10 + AUDIO_FOCUS_LEVEL_11 = 11 + AUDIO_FOCUS_LEVEL_12 = 12 + AUDIO_FOCUS_LEVEL_13 = 13 + AUDIO_FOCUS_LEVEL_DEFAULT = 14 + AUDIO_FOCUS_LEVEL_NO_PREFERENCE = 15 @dataclass class RTPHDR: @@ -343,6 +360,11 @@ class ACOUSTIC_ENVIRONMENT: dim: tuple[float, float, float] = () abscoeff: tuple[float, float, float, float, float, float] = () +@dataclass +class AUDIO_FOCUS: + direction: Optional[ORIENTATION] = None + level: Optional[AUDIO_FOCUS_LEVEL] = None + @dataclass class PIDATA: timestamp: int = 0 @@ -573,6 +595,30 @@ def packAcousticEnv(bitstrm: BitStream, data: any): absCoeff = mapNearestIndex(absorptionCoeffValues, aenv.abscoeff[n]) bitstrm.append(f'uint:2={absCoeff}') +def unpackAudioFocus(bitstrm: ConstBitStream, piSize: int) -> AUDIO_FOCUS: + assert piSize == 1 or piSize == 8 or piSize == 9, "Incorrect PI Data Size for AUDIO_FOCUS" + direction = None + level = None + if piSize == 1: + level = bitstrm.read(4).uint + _ = bitstrm.read(4) + else: + direction = unpackOrientation(bitstrm, 8) + if piSize == 9: + level = bitstrm.read(4).uint + _ = bitstrm.read(4) + + return AUDIO_FOCUS(direction=direction, level=level) + +def packAudioFocus(bitstrm: BitStream, data: any): + assert type(data) == AUDIO_FOCUS, "Audio focus PI Data expects a data of type AUDIO_FOCUS" + auFocus = cast(AUDIO_FOCUS, data) + if auFocus.direction is not None: + packOrientations(bitstrm, [auFocus.direction]) + if auFocus.level is not None: + bitstrm.append(f'uint:4={auFocus.level}') + bitstrm.append(f'uint:4=0') + PIDataUnpacker = [ unpackOrientation, # SCENE_ORIENTATION, @@ -589,13 +635,13 @@ PIDataUnpacker = [ unpackUnsupported, # ISM_DIRECTIVITY unpackDiegetic, # DIEGETIC_TYPE unpackUnsupported, # RESERVED13 - unpackUnsupported, # RESERVED14 + unpackAudioFocus, # AUDIO_FOCUS_INDICATION unpackUnsupported, # RESERVED15 unpackOrientation, # PLAYBACK_DEVICE_ORIENTATION unpackOrientation, # HEAD_ORIENTATION unpackPosition, # LISTENER_POSITION unpackDAS, # DYNAMIC_AUDIO_SUPPRESSION - unpackOrientation, # AUDIO_FOCUS_DIRECTION + unpackAudioFocus, # AUDIO_FOCUS_REQUEST unpackUnsupported, # PI_LATENCY unpackUnsupported, # R_ISM_ID unpackUnsupported, # R_ISM_GAIN @@ -624,13 +670,13 @@ PIDataPacker = [ packUnsupported, # ISM_DIRECTIVITY packDiegetic, # DIEGETIC_TYPE packUnsupported, # RESERVED13 - packUnsupported, # RESERVED14 + packAudioFocus, # AUDIO_FOCUS_INDICATION packUnsupported, # RESERVED15 packOrientation, # PLAYBACK_DEVICE_ORIENTATION packOrientation, # HEAD_ORIENTATION packPosition, # LISTENER_POSITION packDAS, # DYNAMIC_AUDIO_SUPPRESSION - packOrientation, # AUDIO_FOCUS_DIRECTION + packAudioFocus, # AUDIO_FOCUS_DIRECTION packUnsupported, # PI_LATENCY packUnsupported, # R_ISM_ID packUnsupported, # R_ISM_GAIN diff --git a/tests/rtp/test_rtp.py b/tests/rtp/test_rtp.py index 342e29c8b64f3afde799ba22eab8230afeb670fe..4e394ee4787edd2adc4d1470304145777cd9c688 100644 --- a/tests/rtp/test_rtp.py +++ b/tests/rtp/test_rtp.py @@ -164,6 +164,10 @@ def generatePiData(startTs: int, endTs: int) -> dict: someDesc = lambda : AUDIO_DESCRIPTION(isSpeech=bool(random.getrandbits(1)), isMusic=bool(random.getrandbits(1)), isAmbiance=bool(random.getrandbits(1)), isEditable=bool(random.getrandbits(1)), isBinaural=bool(random.getrandbits(1))) someDAS = lambda : DYNAMIC_AUDIO_SUPPRESSION(preferSpeech=bool(random.getrandbits(1)), preferMusic=bool(random.getrandbits(1)), preferAmbiance=bool(random.getrandbits(1)), level=random.randint(0, 15)) someDIG = lambda : DIEGETIC_TYPE(isDigetic=[ bool(random.getrandbits(1)) for _ in range(random.randint(1, 5)) ]) + someAuFocusDirLvl = lambda : AUDIO_FOCUS(ORIENTATION(w=2*random.random()-1.0, x=2*random.random()-1.0, y=2*random.random()-1.0, z=2*random.random()-1.0), level=AUDIO_FOCUS_LEVEL(random.randint(0, 15))) + someAuFocusDir = lambda : AUDIO_FOCUS(ORIENTATION(w=2*random.random()-1.0, x=2*random.random()-1.0, y=2*random.random()-1.0, z=2*random.random()-1.0)) + someAuFocusLvl = lambda : AUDIO_FOCUS(level=AUDIO_FOCUS_LEVEL(random.randint(0, 15))) + someAuFocusList = [someAuFocusDirLvl, someAuFocusDir, someAuFocusLvl] for ts in range(startTs, endTs, 320): pidata = dict() @@ -172,7 +176,7 @@ def generatePiData(startTs: int, endTs: int) -> dict: pidata["DEVICE_ORIENTATION_UNCOMPENSATED"] = someOrientation() pidata["PLAYBACK_DEVICE_ORIENTATION"] = someOrientation() pidata["HEAD_ORIENTATION"] = someOrientation() - pidata["AUDIO_FOCUS_DIRECTION"] = someOrientation() + pidata["AUDIO_FOCUS_REQUEST"] = random.choice(someAuFocusList)() pidata["LISTENER_POSITION"] = somePosition() pidata["DYNAMIC_AUDIO_SUPPRESSION"] = someDAS() pidata["AUDIO_DESCRIPTION"] = [someDesc() for n in range(random.randint(1, 5))] @@ -224,6 +228,17 @@ def isEqualAcousticEnv(ref: ACOUSTIC_ENVIRONMENT, dut: ACOUSTIC_ENVIRONMENT): for r, d in zip(ref.rt60, dut.rt60): assert r == d, f"Acoustic Env PI Data mismatch in rt60 {r} != {d}" +def isEqualAudioFocus(ref: AUDIO_FOCUS, dut: AUDIO_FOCUS): + if ref.direction is not None or dut.direction is not None: + assert ref.direction is not None, "Audio Focus PI Data missing direction" + assert dut.direction is not None, "Audio Focus PI Data missing direction" + if ref.direction is not None and dut.direction is not None: + assert abs(ref.direction['w'] - dut.direction.w) < 0.0001, "Audio Focus PI Data mismatch in direction w" + assert abs(ref.direction['x'] - dut.direction.x) < 0.0001, "Audio Focus PI Data mismatch in direction x" + assert abs(ref.direction['y'] - dut.direction.y) < 0.0001, "Audio Focus PI Data mismatch in direction y" + assert abs(ref.direction['z'] - dut.direction.z) < 0.0001, "Audio Focus PI Data mismatch in direction z" + assert ref.level == dut.level, "Audio Focus PI Data mismatch in level" + class CSVREADER: def __init__(self, csvFile: Path): self.rIdx = 0 @@ -447,6 +462,8 @@ def run_rtp_bitstream_tests ( isEqualDiegetic(DIEGETIC_TYPE(**decoded), data) elif type(generatedPIData[ts][pitype]) == ACOUSTIC_ENVIRONMENT: isEqualAcousticEnv(ACOUSTIC_ENVIRONMENT(**decoded), data) + elif type(generatedPIData[ts][pitype]) == AUDIO_FOCUS: + isEqualAudioFocus(AUDIO_FOCUS(**decoded), data) elif type(generatedPIData[ts][pitype]) == list: for r, d in zip(generatedPIData[ts][pitype], decodedPiData[ts][pitype]): isEqualAD(AUDIO_DESCRIPTION(**d), r)