Commit fa69c637 authored by multrus's avatar multrus
Browse files

revised version of trap for floating-point exceptions

parent ddb7b3e9
Loading
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -56,7 +56,7 @@
#include "debug.h"
#endif
#include "wmc_auto.h"
#ifdef DENORMAL_TRAP
#ifdef FLP_EXCEPTION_TRAP
#include "flp_debug.h"
#endif

@@ -232,8 +232,8 @@ int main(
    reset_wmops();
    reset_mem( USE_BYTES );
#endif
#ifdef DENORMAL_TRAP
    enable_denorm_trap();
#ifdef FLP_EXCEPTION_TRAP
    enable_float_exception_trap( FLE_MASK_DENORM | FLE_MASK_UNDERFLOW );
#endif

    hHrtfBinary.hHrtfTD = NULL;         /* just to avoid compilation warning */
+3 −3
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@
#include "debug.h"
#endif
#include "wmc_auto.h"
#ifdef DENORMAL_TRAP
#ifdef FLP_EXCEPTION_TRAP
#include "flp_debug.h"
#endif

@@ -208,8 +208,8 @@ int main(
    reset_wmops();
    reset_mem( USE_BYTES );
#endif
#ifdef DENORMAL_TRAP
    enable_denorm_trap();
#ifdef FLP_EXECPTION_TRAP
    enable_float_exception_trap( FLE_MASK_DENORM | FLE_MASK_UNDERFLOW );
#endif

    /*------------------------------------------------------------------------------------------*
+3 −3
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@
#include "debug.h"
#endif
#include "wmc_auto.h"
#ifdef DENORMAL_TRAP
#ifdef FLP_EXCEPTION_TRAP
#include "flp_debug.h"
#endif

@@ -726,8 +726,8 @@ int main(
    reset_wmops();
    reset_mem( USE_BYTES );
#endif
#ifdef DENORMAL_TRAP
    enable_denorm_trap();
#ifdef FLP_EXCEPTION_TRAP
    enable_float_exception_trap( FLE_MASK_DENORM | FLE_MASK_UNDERFLOW );
#endif

    for ( i = 0; i < RENDERER_MAX_MASA_INPUTS; ++i )

lib_com/options.h

100644 → 100755
+1 −1
Original line number Diff line number Diff line
@@ -55,7 +55,7 @@
/*#define WMOPS_DETAIL*/                        /* Output detailed complexity printout for every function. Increases runtime overhead */
/*#define WMOPS_WC_FRAME_ANALYSIS*/             /* Output detailed complexity analysis for the worst-case frame */
/*#define MEM_COUNT_DETAILS*/                   /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */
/*#define DENORMAL_TRAP*/                       /* Enable trap for denormals */
/*#define FLP_EXCEPTION_TRAP*/                  /* Enable trap for floating-point exceptions (e.g., denormals, underflow, overflow, ...) */

#ifdef DEBUGGING
/*#define DBG_BITSTREAM_ANALYSIS*/              /* Write bitstream with annotations to a text file */

lib_debug/flp_debug.h

100755 → 100644
+202 −41
Original line number Diff line number Diff line
@@ -40,6 +40,12 @@
#include <xmmintrin.h>
#endif

#define FLE_MASK_INVALID   0x080
#define FLE_MASK_DENORM    0x100
#define FLE_MASK_DIV_ZERO  0x200
#define FLE_MASK_OVERFLOW  0x400
#define FLE_MASK_UNDERFLOW 0x800

/*
   detect underflow execption, which results in a denormal;
   this will not detect each and every denormal - otherwise,
@@ -48,55 +54,210 @@
*/


static inline void enable_denorm_trap(void) {
static inline void enable_float_exception_trap( uint32_t fle_mask )
{

#if defined( _MSC_VER )

    // MSVC, x87
    unsigned int cw = _controlfp( 0, 0 );

    if ( fle_mask & FLE_MASK_INVALID )
    {
        cw &= ~_EM_INVALID;
    }
    if ( fle_mask & FLE_MASK_DENORM )
    {
        cw &= ~_EM_DENORMAL;
    }
    if ( fle_mask & FLE_MASK_DIV_ZERO )
    {
        cw &= ~_EM_ZERODIVIDE;
    }
    if ( fle_mask & FLE_MASK_OVERFLOW )
    {
        cw &= ~_EM_OVERFLOW;
    }
    if ( fle_mask & FLE_MASK_UNDERFLOW )
    {
        cw &= ~_EM_UNDERFLOW;
    }

    _controlfp( cw, _MCW_EM );

#elif defined( __GNUC__ ) && ( defined( __SSE__ ) || defined( __SSE2__ ) || defined( __AVX__ ) )

    // GCC/Clang, x86 SSE/AVX
    unsigned int mx = _mm_getcsr();
    mx &= ~_MM_MASK_UNDERFLOW;  // unmaks underflows

    if ( fle_mask & FLE_MASK_INVALID )
    {
        mx &= ~_MM_MASK_INVALID;
    }
    if ( fle_mask & FLE_MASK_DENORM )
    {
        mx &= ~_MM_MASK_DENORM;
    }
    if ( fle_mask & FLE_MASK_DIV_ZERO )
    {
        mx &= ~_MM_MASK_DIV_ZERO;
    }
    if ( fle_mask & FLE_MASK_OVERFLOW )
    {
        mx &= ~_MM_MASK_OVERFLOW;
    }
    if ( fle_mask & FLE_MASK_UNDERFLOW )
    {
        mx &= ~_MM_MASK_UNDERFLOW;
    }

    _mm_setcsr( mx );

#elif defined( __aarch64__ )
    // AArch64 (Apple Silicon)

    // AArch64 (e.g., Apple Silicon)
    uint64_t fpcr;
    __asm__ volatile("mrs %0, fpcr" : "=r"(fpcr));
    __asm__ volatile( "mrs %0, fpcr"
                      : "=r"( fpcr ) );

    // disable sits 24(FZ) & 25(DN) --> allow denormals to happen
    fpcr &= ~( ( 1ull << 24 ) | ( 1ull << 25 ) );

    if ( fle_mask & FLE_MASK_INVALID )
    {
        // set bit 8 (IOE) to unmask invalid operations exceptions
        fpcr |= ( 1ull << 8 );
    }
    if ( fle_mask & FLE_MASK_DENORM )
    {
        // set bit 15 (IDE) to unmask input denormal exceptions
        fpcr |= ( 1ull << 15 );
    }
    if ( fle_mask & FLE_MASK_DIV_ZERO )
    {
        // set bit 9 (DZE) to unmask div_zero exceptions
        fpcr |= ( 1ull << 9 );
    }
    if ( fle_mask & FLE_MASK_OVERFLOW )
    {
        // set bit 10 (OFE) to unmask overflow exceptions
        fpcr |= ( 1ull << 10 );
    }
    if ( fle_mask & FLE_MASK_UNDERFLOW )
    {
        // set bit 11 (UFE) to unmask underflow exceptions
        fpcr |= ( 1ull << 11 );
    }

    __asm__ volatile( "msr fpcr, %0" ::"r"( fpcr ) );

#else
    fprintf(stderr, "enable_denorm_trap() not supported on platform!\n");
    fprintf( stderr, "enable_float_exception_trap() not supported on platform!\n" );
#endif
}

static inline void disable_denorm_trap(void) {
static inline void disable_float_exception_trap( uint32_t fle_mask )
{

#if defined( _MSC_VER )

    // MSVC, x87
    unsigned int cw = _controlfp( 0, 0 );

    if ( fle_mask & FLE_MASK_INVALID )
    {
        cw |= _EM_INVALID;
    }
    if ( fle_mask & FLE_MASK_DENORM )
    {
        cw |= _EM_DENORMAL;
    }
    if ( fle_mask & FLE_MASK_DIV_ZERO )
    {
        cw |= _EM_ZERODIVIDE;
    }
    if ( fle_mask & FLE_MASK_OVERFLOW )
    {
        cw |= _EM_OVERFLOW;
    }
    if ( fle_mask & FLE_MASK_UNDERFLOW )
    {
        cw |= _EM_UNDERFLOW;
    }

    _controlfp( cw, _MCW_EM );

#elif defined( __GNUC__ ) && ( defined( __SSE__ ) || defined( __SSE2__ ) || defined( __AVX__ ) )

    // GCC/Clang, x86 SSE/AVX
    unsigned int mx = _mm_getcsr();
    mx |= _MM_MASK_UNDERFLOW;       // mask underflows

    if ( fle_mask & FLE_MASK_INVALID )
    {
        mx |= _MM_MASK_INVALID;
    }
    if ( fle_mask & FLE_MASK_DENORM )
    {
        mx |= _MM_MASK_DENORM;
    }
    if ( fle_mask & FLE_MASK_DIV_ZERO )
    {
        mx |= _MM_MASK_DIV_ZERO;
    }
    if ( fle_mask & FLE_MASK_OVERFLOW )
    {
        mx |= _MM_MASK_OVERFLOW;
    }
    if ( fle_mask & FLE_MASK_UNDERFLOW )
    {
        mx |= _MM_MASK_UNDERFLOW;
    }

    _mm_setcsr( mx );

#elif defined( __aarch64__ )

    // AArch64 (Apple Silicon)
    uint64_t fpcr;
    __asm__ volatile("mrs %0, fpcr" : "=r"(fpcr));
    // delete bit 11 (UFE), set bits 24/25 (FZ/DN) again
    __asm__ volatile( "mrs %0, fpcr"
                      : "=r"( fpcr ) );

    if ( fle_mask & FLE_MASK_INVALID )
    {
        // unset bit 8 (IOE) to mask invalid operations exceptions
        fpcr &= ~( 1ull << 8 );
    }
    if ( fle_mask & FLE_MASK_DENORM )
    {
        // unset bit 15 (IDE) to mask input denormal exceptions
        fpcr &= ~( 1ull << 15 );
    }
    if ( fle_mask & FLE_MASK_DIV_ZERO )
    {
        // unset bit 9 (DZE) to mask div_zero exceptions
        fpcr &= ~( 1ull << 9 );
    }
    if ( fle_mask & FLE_MASK_OVERFLOW )
    {
        // unset bit 10 (OFE) to mask overflow exceptions
        fpcr &= ~( 1ull << 10 );
    }
    if ( fle_mask & FLE_MASK_UNDERFLOW )
    {
        // unset bit 11 (UFE) to mask underflow exceptions
        fpcr &= ~( 1ull << 11 );
    }


    // set bits 24/25 (FZ/DN) again
    fpcr |= ( 1ull << 24 ) | ( 1ull << 25 );
    fprintf( stderr, "float_exception_trap(): Setting bits 24/25 (FZ/DN) again\n" );

    __asm__ volatile( "msr fpcr, %0" ::"r"( fpcr ) );

#else
    fprintf(stderr, "disable_denorm_trap() not supported on platform!\n");

    fprintf( stderr, "float_exception_trap() not supported on platform!\n" );

#endif
}