diff --git a/apps/decoder.c b/apps/decoder.c index 205691aba8883e542364682eb878fab9aef6f00b..63f68854776e937077ec9ef0522af8eeca6c3d7f 100644 --- a/apps/decoder.c +++ b/apps/decoder.c @@ -56,6 +56,9 @@ #include "debug.h" #endif #include "wmc_auto.h" +#ifdef FLP_EXCEPTION_TRAP +#include "flp_debug.h" +#endif #define WMC_TOOL_SKIP @@ -227,6 +230,9 @@ int main( reset_wmops(); reset_mem( USE_BYTES ); #endif +#ifdef FLP_EXCEPTION_TRAP + enable_float_exception_trap( FLE_MASK_DENORM | FLE_MASK_UNDERFLOW ); +#endif hHrtfBinary.hHrtfTD = NULL; /* just to avoid compilation warning */ hHrtfBinary.hHrtfStatistics = NULL; /* just to avoid compilation warning */ diff --git a/apps/encoder.c b/apps/encoder.c index 61bb8ebb3f0d24c4835755b0b69ced48636add72..4d9626edc3f972407ac59ce47074f613529597d1 100644 --- a/apps/encoder.c +++ b/apps/encoder.c @@ -15,7 +15,6 @@ the software. This notice grants no license of any kind, including but not limited to patent license, nor is any license granted by implication, estoppel or otherwise. - Contributors are required to enter into the IVAS codec Public Collaboration agreement before making Contributors are required to enter into the IVAS codec Public Collaboration agreement before making contributions. @@ -43,6 +42,9 @@ #include "debug.h" #endif #include "wmc_auto.h" +#ifdef FLP_EXCEPTION_TRAP +#include "flp_debug.h" +#endif #define WMC_TOOL_SKIP @@ -206,6 +208,9 @@ int main( reset_wmops(); reset_mem( USE_BYTES ); #endif +#ifdef FLP_EXCEPTION_TRAP + enable_float_exception_trap( FLE_MASK_DENORM | FLE_MASK_UNDERFLOW ); +#endif /*------------------------------------------------------------------------------------------* * Parse command-line arguments diff --git a/apps/renderer.c b/apps/renderer.c index bf2587680aa9986a8a6141c20a771e7f31e2eaf5..740680bb22cde65a19981c13c9ceb13d7d0e0457 100644 --- a/apps/renderer.c +++ b/apps/renderer.c @@ -53,6 +53,9 @@ #include "debug.h" #endif #include "wmc_auto.h" +#ifdef FLP_EXCEPTION_TRAP +#include "flp_debug.h" +#endif #define WMC_TOOL_SKIP @@ -723,6 +726,9 @@ int main( reset_wmops(); reset_mem( USE_BYTES ); #endif +#ifdef FLP_EXCEPTION_TRAP + enable_float_exception_trap( FLE_MASK_DENORM | FLE_MASK_UNDERFLOW ); +#endif for ( i = 0; i < RENDERER_MAX_MASA_INPUTS; ++i ) { diff --git a/lib_com/options.h b/lib_com/options.h old mode 100644 new mode 100755 index 45d82a98d3f82f8c68ead2bca4e1f1d29090c9ac..e69dd22ab868c10e5e0ad82629e3b505acefde80 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -55,6 +55,7 @@ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ /*#define WMOPS_WC_FRAME_ANALYSIS*/ /* Output detailed complexity analysis for the worst-case frame */ /*#define MEM_COUNT_DETAILS*/ /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */ +/*#define FLP_EXCEPTION_TRAP*/ /* Enable trap for floating-point exceptions (e.g., denormals, underflow, overflow, ...) */ #ifdef DEBUGGING /*#define DBG_BITSTREAM_ANALYSIS*/ /* Write bitstream with annotations to a text file */ diff --git a/lib_debug/flp_debug.h b/lib_debug/flp_debug.h new file mode 100644 index 0000000000000000000000000000000000000000..d0fd894a2ef0a824eb2da87a77c2bb63e1e8163f --- /dev/null +++ b/lib_debug/flp_debug.h @@ -0,0 +1,263 @@ +/****************************************************************************************************** + + (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#include + +#if defined( _MSC_VER ) +// MSVC, x87 +#include +#elif defined( __GNUC__ ) && ( defined( __SSE__ ) || defined( __SSE2__ ) || defined( __AVX__ ) ) +// GCC/Clang, x86 SSE/AVX +#include +#endif + +#define FLE_MASK_INVALID 0x080 +#define FLE_MASK_DENORM 0x100 +#define FLE_MASK_DIV_ZERO 0x200 +#define FLE_MASK_OVERFLOW 0x400 +#define FLE_MASK_UNDERFLOW 0x800 + +/* + detect underflow execption, which results in a denormal; + this will not detect each and every denormal - otherwise, + all FLP values would have to be tested for denormals using + e.g. fpclassify()/fpstatus or bitmasks +*/ + + +static inline void enable_float_exception_trap( uint32_t fle_mask ) +{ + +#if defined( _MSC_VER ) + + // MSVC, x87 + unsigned int cw = _controlfp( 0, 0 ); + + if ( fle_mask & FLE_MASK_INVALID ) + { + cw &= ~_EM_INVALID; + } + if ( fle_mask & FLE_MASK_DENORM ) + { + cw &= ~_EM_DENORMAL; + } + if ( fle_mask & FLE_MASK_DIV_ZERO ) + { + cw &= ~_EM_ZERODIVIDE; + } + if ( fle_mask & FLE_MASK_OVERFLOW ) + { + cw &= ~_EM_OVERFLOW; + } + if ( fle_mask & FLE_MASK_UNDERFLOW ) + { + cw &= ~_EM_UNDERFLOW; + } + + _controlfp( cw, _MCW_EM ); + +#elif defined( __GNUC__ ) && ( defined( __SSE__ ) || defined( __SSE2__ ) || defined( __AVX__ ) ) + + // GCC/Clang, x86 SSE/AVX + unsigned int mx = _mm_getcsr(); + + if ( fle_mask & FLE_MASK_INVALID ) + { + mx &= ~_MM_MASK_INVALID; + } + if ( fle_mask & FLE_MASK_DENORM ) + { + mx &= ~_MM_MASK_DENORM; + } + if ( fle_mask & FLE_MASK_DIV_ZERO ) + { + mx &= ~_MM_MASK_DIV_ZERO; + } + if ( fle_mask & FLE_MASK_OVERFLOW ) + { + mx &= ~_MM_MASK_OVERFLOW; + } + if ( fle_mask & FLE_MASK_UNDERFLOW ) + { + mx &= ~_MM_MASK_UNDERFLOW; + } + + _mm_setcsr( mx ); + +#elif defined( __aarch64__ ) + + // AArch64 (e.g., Apple Silicon) + uint64_t fpcr; + __asm__ volatile( "mrs %0, fpcr" + : "=r"( fpcr ) ); + + // disable sits 24(FZ) & 25(DN) --> allow denormals to happen + fpcr &= ~( ( 1ull << 24 ) | ( 1ull << 25 ) ); + + if ( fle_mask & FLE_MASK_INVALID ) + { + // set bit 8 (IOE) to unmask invalid operations exceptions + fpcr |= ( 1ull << 8 ); + } + if ( fle_mask & FLE_MASK_DENORM ) + { + // set bit 15 (IDE) to unmask input denormal exceptions + fpcr |= ( 1ull << 15 ); + } + if ( fle_mask & FLE_MASK_DIV_ZERO ) + { + // set bit 9 (DZE) to unmask div_zero exceptions + fpcr |= ( 1ull << 9 ); + } + if ( fle_mask & FLE_MASK_OVERFLOW ) + { + // set bit 10 (OFE) to unmask overflow exceptions + fpcr |= ( 1ull << 10 ); + } + if ( fle_mask & FLE_MASK_UNDERFLOW ) + { + // set bit 11 (UFE) to unmask underflow exceptions + fpcr |= ( 1ull << 11 ); + } + + __asm__ volatile( "msr fpcr, %0" ::"r"( fpcr ) ); + +#else + fprintf( stderr, "enable_float_exception_trap() not supported on platform!\n" ); +#endif +} + +static inline void disable_float_exception_trap( uint32_t fle_mask ) +{ + +#if defined( _MSC_VER ) + + // MSVC, x87 + unsigned int cw = _controlfp( 0, 0 ); + + if ( fle_mask & FLE_MASK_INVALID ) + { + cw |= _EM_INVALID; + } + if ( fle_mask & FLE_MASK_DENORM ) + { + cw |= _EM_DENORMAL; + } + if ( fle_mask & FLE_MASK_DIV_ZERO ) + { + cw |= _EM_ZERODIVIDE; + } + if ( fle_mask & FLE_MASK_OVERFLOW ) + { + cw |= _EM_OVERFLOW; + } + if ( fle_mask & FLE_MASK_UNDERFLOW ) + { + cw |= _EM_UNDERFLOW; + } + + _controlfp( cw, _MCW_EM ); + +#elif defined( __GNUC__ ) && ( defined( __SSE__ ) || defined( __SSE2__ ) || defined( __AVX__ ) ) + + // GCC/Clang, x86 SSE/AVX + unsigned int mx = _mm_getcsr(); + + if ( fle_mask & FLE_MASK_INVALID ) + { + mx |= _MM_MASK_INVALID; + } + if ( fle_mask & FLE_MASK_DENORM ) + { + mx |= _MM_MASK_DENORM; + } + if ( fle_mask & FLE_MASK_DIV_ZERO ) + { + mx |= _MM_MASK_DIV_ZERO; + } + if ( fle_mask & FLE_MASK_OVERFLOW ) + { + mx |= _MM_MASK_OVERFLOW; + } + if ( fle_mask & FLE_MASK_UNDERFLOW ) + { + mx |= _MM_MASK_UNDERFLOW; + } + + _mm_setcsr( mx ); + +#elif defined( __aarch64__ ) + + // AArch64 (Apple Silicon) + uint64_t fpcr; + __asm__ volatile( "mrs %0, fpcr" + : "=r"( fpcr ) ); + + if ( fle_mask & FLE_MASK_INVALID ) + { + // unset bit 8 (IOE) to mask invalid operations exceptions + fpcr &= ~( 1ull << 8 ); + } + if ( fle_mask & FLE_MASK_DENORM ) + { + // unset bit 15 (IDE) to mask input denormal exceptions + fpcr &= ~( 1ull << 15 ); + } + if ( fle_mask & FLE_MASK_DIV_ZERO ) + { + // unset bit 9 (DZE) to mask div_zero exceptions + fpcr &= ~( 1ull << 9 ); + } + if ( fle_mask & FLE_MASK_OVERFLOW ) + { + // unset bit 10 (OFE) to mask overflow exceptions + fpcr &= ~( 1ull << 10 ); + } + if ( fle_mask & FLE_MASK_UNDERFLOW ) + { + // unset bit 11 (UFE) to mask underflow exceptions + fpcr &= ~( 1ull << 11 ); + } + + + // set bits 24/25 (FZ/DN) again + fpcr |= ( 1ull << 24 ) | ( 1ull << 25 ); + fprintf( stderr, "float_exception_trap(): Setting bits 24/25 (FZ/DN) again\n" ); + + __asm__ volatile( "msr fpcr, %0" ::"r"( fpcr ) ); + +#else + + fprintf( stderr, "float_exception_trap() not supported on platform!\n" ); + +#endif +}