Commit 42d8b87d authored by sagnowski's avatar sagnowski
Browse files

[WIP] Add direct parsing of strings into fixed-point values

parent 2d75a92b
Loading
Loading
Loading
Loading
Loading

lib_util/parse_fx.c

0 → 100644
+277 −0
Original line number Diff line number Diff line
// Work in progress - not integrated in the project yet.
//
// Test with:
//
//  cc -O0 -g -o parse-fx ./lib_util/parse_fx.c && ./parse-fx
//

#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define DEC_SEP '.'

typedef enum {
   PARSE_ERR_OK = 0,
   PARSE_ERR_INVALID_STR,
   PARSE_ERR_OUT_OF_RANGE,
} ParseErr;

/*---------------------------------------------------------------------*
 * parse_chars_to_fx()
 *
 * Parses a sequence of characters to a fixed-point number
 *---------------------------------------------------------------------*/
ParseErr parse_chars_to_fx( const char* begin, const char *end, int16_t Q, int32_t* fx_out )
{
    bool neg = false;
    const char* int_begin = NULL;
    const char* dec_sep = NULL;
    const char* frac_begin = NULL;
    const char* e_sep = NULL;
    const char* e_begin = NULL;

    // Skip whitespace
    while( isspace( *begin ) ) 
    {
        ++begin;
    }

    //
    // Validate format - based on documentation of atof from the C std lib
    //
    const char* c = begin;

    // Optional plus or minus sign
    if ( *c == '-' )
    {
        neg = true;
        ++c;
    }
    else if ( *c == '+' )
    {
        ++c;
    }

    // Non-empty sequence of decimal digits optionally containing decimal-point character
    while ( true )
    {
        if ( isdigit( *c ) )
        {
            if ( int_begin == NULL && dec_sep == NULL )
            {
                int_begin = c;
            }
            else if ( frac_begin == NULL && dec_sep != NULL )
            {
                frac_begin = c;
            }
        }
        else if ( *c == DEC_SEP )
        {
            if ( dec_sep != NULL )
            {
                return PARSE_ERR_INVALID_STR;
            }
            dec_sep = c;
        }
        else
        {
            break;
        }
        ++c;
    }
    if ( int_begin == NULL && frac_begin == NULL )
    {
        return PARSE_ERR_INVALID_STR;
    }

    // Optional e or E followed with optional minus or plus sign and nonempty sequence of decimal digits
    // TODO?

    // Disallow invalid trailing characters
    if ( c != end )
    {
        return PARSE_ERR_INVALID_STR;
    }

    //
    // Parse value
    //

    // Integer part
    // TODO: range checks could be smarter
    const uint64_t integer_max_abs = neg ? -(int64_t)INT32_MIN : INT32_MAX;
    uint64_t integer = 0;
    if ( int_begin != NULL )
    {
        const char* int_end = dec_sep != NULL ? dec_sep :
                              e_sep   != NULL ? e_sep   :
                                                end     ;

        for ( c = int_begin; c != int_end; ++c )
        {
            if ( ( integer_max_abs / 10 ) < integer )
            {
                return PARSE_ERR_OUT_OF_RANGE;
            }
            integer *= 10; 

            uint32_t digit = ( *c - '0' );
            if ( integer_max_abs - digit < integer )
            {
                return PARSE_ERR_OUT_OF_RANGE;
            }
            integer += digit;
        }
        if ( integer_max_abs >> Q < integer )
        {
            return PARSE_ERR_OUT_OF_RANGE;
        }
        integer <<= Q;
    }

    // Fractional part
    // TODO: range checks
    uint64_t fractional = 0;
    if ( frac_begin != NULL )
    {
        const char* frac_end = e_sep != NULL ? e_sep :
                                               end   ;

        uint32_t scale = 1;
        for ( c = frac_begin; c != frac_end; ++c )
        {
            scale *= 10;
            fractional = ( fractional * 10 ) + ( *c - '0' );
        }
        fractional <<= Q;
        fractional = ( fractional + ( scale / 2 ) ) / scale; // Add half scale for rounding
    }

    assert( INT64_MIN <= (int64_t)integer );
    assert( (int64_t)integer <= INT64_MAX );
    assert( INT64_MIN <= (int64_t)fractional );
    assert( (int64_t)fractional <= INT64_MAX );
    int64_t result = (int64_t)integer + (int64_t)fractional;
    if ( neg )
    {
        result *= -1;
    }
    assert( INT32_MIN <= result );
    assert( result <= INT32_MAX );
    *fx_out = (int32_t)result;

    return PARSE_ERR_OK;
}

/*---------------------------------------------------------------------*
 * parse_str_to_fx()
 *
 * Parses a null-terminated string to a fixed-point number
 *---------------------------------------------------------------------*/
ParseErr parse_str_to_fx( const char* s, int16_t Q, int32_t* fx_out )
{
    const char* end = s;
    while ( *end != '\0' ) {
        ++end;
    }
    return parse_chars_to_fx( s, end, Q, fx_out );
}

#define ARR_COUNT( array ) ( sizeof( ( array ) ) / sizeof( *( array ) ) )

typedef struct {
    const char* s;
    ParseErr expect_err;
    int16_t Q;
    int32_t expect_fx;
} TestCase;

int main(int argc, char** argv)
{
    (void)argc, (void)argv;

    TestCase T[] = {
        // invalid strings
        { "abc", .expect_err = PARSE_ERR_INVALID_STR },
        { ".", .expect_err = PARSE_ERR_INVALID_STR },
        { "-", .expect_err = PARSE_ERR_INVALID_STR },
        { "+", .expect_err = PARSE_ERR_INVALID_STR },
        { "+.", .expect_err = PARSE_ERR_INVALID_STR },
        { "5894u", .expect_err = PARSE_ERR_INVALID_STR },
        { "1+", .expect_err = PARSE_ERR_INVALID_STR },
        { "10.i", .expect_err = PARSE_ERR_INVALID_STR },
        { "289.-", .expect_err = PARSE_ERR_INVALID_STR },
        // integers
        { "1", .Q = 0, .expect_fx = 1 },
        {"000", .Q = 0, .expect_fx = 0 },
        { "1234", .Q = 0, .expect_fx = 1234 },
        { "2.", .Q = 0, .expect_fx = 2 },
        { "3.0", .Q = 0, .expect_fx = 3 },
        { "-19", .Q = 0, .expect_fx = -19 },
        { "+42", .Q = 0, .expect_fx = 42 },
        { "+972.", .Q = 0, .expect_fx = 972 },
        { "-062.", .Q = 0, .expect_fx = -62 },
        // basic fractionals
        { "0.5", .Q = 1, .expect_fx = 1 },
        { "0.5", .Q = 2, .expect_fx = 2 },
        { "-0.5", .Q = 1, .expect_fx = -1 },
        { "-0.5", .Q = 2, .expect_fx = -2 },
        { "0.125", .Q = 10, .expect_fx = 128 },
        // more fractionals
        { ".23479865", .Q = 11, .expect_fx = 481 },
        { "+.16", .Q = 5, .expect_fx = 5 },
        { "-.52", .Q = 31, .expect_fx = -1116691497 },
        { "+9.9999", .Q = 20, .expect_fx = 10485655 },
        { "-99999.99", .Q = 11, .expect_fx = -204799980 },
        // range checks - exact bounds in Q0
        { "2147483647", .Q = 0, .expect_fx = 2147483647 },
        { "2147483648", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "-2147483648", .Q = 0, .expect_fx = -2147483648 },
        { "-2147483649", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        // range checks - exact bounds in Q1
        { "1073741823", .Q = 1, .expect_fx = 2147483646 },
        { "1073741824", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "-1073741824", .Q = 1, .expect_fx = -2147483648 },
        { "-1073741825", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        // range checks - way over bounds in Q0
        { "21474836470", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "21474836489", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "-21474836487", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "-21474836497", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        // range checks - way over bounds in Q1
        { "2147483647", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "2147483648", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "-2147483648", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE },
        { "-2147483649", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE },
    };

    for ( size_t i = 0; i < ARR_COUNT( T ); ++i )
    {
        TestCase t = T[i];
        printf( "\"%s\" with Q%d", t.s, t.Q );
        int32_t x;

        const ParseErr err = parse_str_to_fx( t.s, t.Q, &x );
        if ( err != t.expect_err )
        {
            printf( "\nExpected error %d, got %d\n", t.expect_err, err );
            assert( 0 );
        }

        if ( t.expect_err == PARSE_ERR_OK && x != t.expect_fx )
        {
            printf( "\nExpected result %d, got %d\n", t.expect_fx, x );
            assert( 0 );
        }
        printf(" - OK\n");
    }

    return 0;
}