From 42d8b87dc05a344366cdaf961b1a91bdf5a29f98 Mon Sep 17 00:00:00 2001 From: Kacper Sagnowski Date: Tue, 28 Oct 2025 17:33:11 +0100 Subject: [PATCH] [WIP] Add direct parsing of strings into fixed-point values --- lib_util/parse_fx.c | 277 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 lib_util/parse_fx.c diff --git a/lib_util/parse_fx.c b/lib_util/parse_fx.c new file mode 100644 index 000000000..95c0b1750 --- /dev/null +++ b/lib_util/parse_fx.c @@ -0,0 +1,277 @@ +// Work in progress - not integrated in the project yet. +// +// Test with: +// +// cc -O0 -g -o parse-fx ./lib_util/parse_fx.c && ./parse-fx +// + +#include +#include +#include +#include +#include +#include +#include + +#define DEC_SEP '.' + +typedef enum { + PARSE_ERR_OK = 0, + PARSE_ERR_INVALID_STR, + PARSE_ERR_OUT_OF_RANGE, +} ParseErr; + +/*---------------------------------------------------------------------* + * parse_chars_to_fx() + * + * Parses a sequence of characters to a fixed-point number + *---------------------------------------------------------------------*/ +ParseErr parse_chars_to_fx( const char* begin, const char *end, int16_t Q, int32_t* fx_out ) +{ + bool neg = false; + const char* int_begin = NULL; + const char* dec_sep = NULL; + const char* frac_begin = NULL; + const char* e_sep = NULL; + const char* e_begin = NULL; + + // Skip whitespace + while( isspace( *begin ) ) + { + ++begin; + } + + // + // Validate format - based on documentation of atof from the C std lib + // + const char* c = begin; + + // Optional plus or minus sign + if ( *c == '-' ) + { + neg = true; + ++c; + } + else if ( *c == '+' ) + { + ++c; + } + + // Non-empty sequence of decimal digits optionally containing decimal-point character + while ( true ) + { + if ( isdigit( *c ) ) + { + if ( int_begin == NULL && dec_sep == NULL ) + { + int_begin = c; + } + else if ( frac_begin == NULL && dec_sep != NULL ) + { + frac_begin = c; + } + } + else if ( *c == DEC_SEP ) + { + if ( dec_sep != NULL ) + { + return PARSE_ERR_INVALID_STR; + } + dec_sep = c; + } + else + { + break; + } + ++c; + } + if ( int_begin == NULL && frac_begin == NULL ) + { + return PARSE_ERR_INVALID_STR; + } + + // Optional e or E followed with optional minus or plus sign and nonempty sequence of decimal digits + // TODO? + + // Disallow invalid trailing characters + if ( c != end ) + { + return PARSE_ERR_INVALID_STR; + } + + // + // Parse value + // + + // Integer part + // TODO: range checks could be smarter + const uint64_t integer_max_abs = neg ? -(int64_t)INT32_MIN : INT32_MAX; + uint64_t integer = 0; + if ( int_begin != NULL ) + { + const char* int_end = dec_sep != NULL ? dec_sep : + e_sep != NULL ? e_sep : + end ; + + for ( c = int_begin; c != int_end; ++c ) + { + if ( ( integer_max_abs / 10 ) < integer ) + { + return PARSE_ERR_OUT_OF_RANGE; + } + integer *= 10; + + uint32_t digit = ( *c - '0' ); + if ( integer_max_abs - digit < integer ) + { + return PARSE_ERR_OUT_OF_RANGE; + } + integer += digit; + } + if ( integer_max_abs >> Q < integer ) + { + return PARSE_ERR_OUT_OF_RANGE; + } + integer <<= Q; + } + + // Fractional part + // TODO: range checks + uint64_t fractional = 0; + if ( frac_begin != NULL ) + { + const char* frac_end = e_sep != NULL ? e_sep : + end ; + + uint32_t scale = 1; + for ( c = frac_begin; c != frac_end; ++c ) + { + scale *= 10; + fractional = ( fractional * 10 ) + ( *c - '0' ); + } + fractional <<= Q; + fractional = ( fractional + ( scale / 2 ) ) / scale; // Add half scale for rounding + } + + assert( INT64_MIN <= (int64_t)integer ); + assert( (int64_t)integer <= INT64_MAX ); + assert( INT64_MIN <= (int64_t)fractional ); + assert( (int64_t)fractional <= INT64_MAX ); + int64_t result = (int64_t)integer + (int64_t)fractional; + if ( neg ) + { + result *= -1; + } + assert( INT32_MIN <= result ); + assert( result <= INT32_MAX ); + *fx_out = (int32_t)result; + + return PARSE_ERR_OK; +} + +/*---------------------------------------------------------------------* + * parse_str_to_fx() + * + * Parses a null-terminated string to a fixed-point number + *---------------------------------------------------------------------*/ +ParseErr parse_str_to_fx( const char* s, int16_t Q, int32_t* fx_out ) +{ + const char* end = s; + while ( *end != '\0' ) { + ++end; + } + return parse_chars_to_fx( s, end, Q, fx_out ); +} + +#define ARR_COUNT( array ) ( sizeof( ( array ) ) / sizeof( *( array ) ) ) + +typedef struct { + const char* s; + ParseErr expect_err; + int16_t Q; + int32_t expect_fx; +} TestCase; + +int main(int argc, char** argv) +{ + (void)argc, (void)argv; + + TestCase T[] = { + // invalid strings + { "abc", .expect_err = PARSE_ERR_INVALID_STR }, + { ".", .expect_err = PARSE_ERR_INVALID_STR }, + { "-", .expect_err = PARSE_ERR_INVALID_STR }, + { "+", .expect_err = PARSE_ERR_INVALID_STR }, + { "+.", .expect_err = PARSE_ERR_INVALID_STR }, + { "5894u", .expect_err = PARSE_ERR_INVALID_STR }, + { "1+", .expect_err = PARSE_ERR_INVALID_STR }, + { "10.i", .expect_err = PARSE_ERR_INVALID_STR }, + { "289.-", .expect_err = PARSE_ERR_INVALID_STR }, + // integers + { "1", .Q = 0, .expect_fx = 1 }, + {"000", .Q = 0, .expect_fx = 0 }, + { "1234", .Q = 0, .expect_fx = 1234 }, + { "2.", .Q = 0, .expect_fx = 2 }, + { "3.0", .Q = 0, .expect_fx = 3 }, + { "-19", .Q = 0, .expect_fx = -19 }, + { "+42", .Q = 0, .expect_fx = 42 }, + { "+972.", .Q = 0, .expect_fx = 972 }, + { "-062.", .Q = 0, .expect_fx = -62 }, + // basic fractionals + { "0.5", .Q = 1, .expect_fx = 1 }, + { "0.5", .Q = 2, .expect_fx = 2 }, + { "-0.5", .Q = 1, .expect_fx = -1 }, + { "-0.5", .Q = 2, .expect_fx = -2 }, + { "0.125", .Q = 10, .expect_fx = 128 }, + // more fractionals + { ".23479865", .Q = 11, .expect_fx = 481 }, + { "+.16", .Q = 5, .expect_fx = 5 }, + { "-.52", .Q = 31, .expect_fx = -1116691497 }, + { "+9.9999", .Q = 20, .expect_fx = 10485655 }, + { "-99999.99", .Q = 11, .expect_fx = -204799980 }, + // range checks - exact bounds in Q0 + { "2147483647", .Q = 0, .expect_fx = 2147483647 }, + { "2147483648", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "-2147483648", .Q = 0, .expect_fx = -2147483648 }, + { "-2147483649", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + // range checks - exact bounds in Q1 + { "1073741823", .Q = 1, .expect_fx = 2147483646 }, + { "1073741824", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "-1073741824", .Q = 1, .expect_fx = -2147483648 }, + { "-1073741825", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + // range checks - way over bounds in Q0 + { "21474836470", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "21474836489", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "-21474836487", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "-21474836497", .Q = 0, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + // range checks - way over bounds in Q1 + { "2147483647", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "2147483648", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "-2147483648", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + { "-2147483649", .Q = 1, .expect_err = PARSE_ERR_OUT_OF_RANGE }, + }; + + for ( size_t i = 0; i < ARR_COUNT( T ); ++i ) + { + TestCase t = T[i]; + printf( "\"%s\" with Q%d", t.s, t.Q ); + int32_t x; + + const ParseErr err = parse_str_to_fx( t.s, t.Q, &x ); + if ( err != t.expect_err ) + { + printf( "\nExpected error %d, got %d\n", t.expect_err, err ); + assert( 0 ); + } + + if ( t.expect_err == PARSE_ERR_OK && x != t.expect_fx ) + { + printf( "\nExpected result %d, got %d\n", t.expect_fx, x ); + assert( 0 ); + } + printf(" - OK\n"); + } + + return 0; +} + -- GitLab