From 53a1ac937d1eed49c3a183a91d8bb659d0abb116 Mon Sep 17 00:00:00 2001 From: patilsat Date: Sat, 25 Jan 2025 05:36:55 -0800 Subject: [PATCH 1/7] Update BASOP_Util_Divide3232_Scale_cadence() with additional parameter - bits --- lib_com/basop_util.c | 104 +++++++++++++++++++++++++++++++++++++++++++ lib_com/basop_util.h | 7 +++ 2 files changed, 111 insertions(+) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index cef466471..51c3ddf98 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -958,6 +958,55 @@ Word16 BASOP_Util_Divide3232_uu_1616_Scale( Word32 x, Word32 y, Word16 *s ) return ( z ); } +#ifdef DIV32_OPT +Word32 div_w( Word32 L_num, Word32 L_den, Word16 bits ) +{ + Word32 L_var_out = 0; + Word16 iteration; + move32(); + + + IF( L_den == 0 ) + { + /* printf("Division by 0 in div_l, Fatal error in "); printStack(); */ + return ( 0 ); + } + + test(); + IF( ( L_num < 0 ) || ( L_den < 0 ) ) + { + /* printf("Division Error in div_l, Fatal error in "); printStack(); */ + return ( 0 ); + } + Word64 W_num, W_den; + W_num = W_deposit32_l( L_num ); + W_den = W_deposit32_l( L_den ); + + IF( GT_64( W_num, W_den ) ) + { + return MAX_32; + } + ELSE + { + W_num = W_shr( W_num, 1 ); + W_den = W_shr( W_den, 1 ); + + FOR( iteration = 0; iteration < bits; iteration++ ) + { + L_var_out = L_shl( L_var_out, 1 ); + W_num = W_shl( W_num, 1 ); + + IF( GT_64( W_num, W_den ) ) + { + W_num = W_sub( W_num, W_den ); + L_var_out = L_add( L_var_out, 1 ); + } + } + + return L_shl(L_var_out, 31-bits); + } +} +#else /* DIV32_OPT */ Word32 div_w( Word32 L_num, Word32 L_den ) { Word32 L_var_out = 0; @@ -1005,7 +1054,9 @@ Word32 div_w( Word32 L_num, Word32 L_den ) return L_var_out; } } +#endif /* DIV32_OPT */ +#ifndef DIV32_OPT Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { Word32 z; @@ -1057,6 +1108,59 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) return z; } +#else /* DIV32_OPT */ +Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s, Word16 bits ) +{ + Word32 z; + Word16 sx; + Word16 sy; + Word32 sign; + + /* assert (x >= (Word32)0); */ + assert( y != (Word32) 0 ); + + sign = 0; + move16(); + + IF( x < 0 ) + { + x = L_negate( x ); + sign = L_xor( sign, 1 ); + } + + IF( y < 0 ) + { + y = L_negate( y ); + sign = L_xor( sign, 1 ); + } + + IF( x == (Word32) 0 ) + { + *s = 0; + return ( (Word32) 0 ); + } + + sx = norm_l( x ); + x = L_shl( x, sx ); + x = L_shr( x, 1 ); + move16(); + *s = sub( 1, sx ); + + sy = norm_l( y ); + y = L_shl( y, sy ); + move16(); + *s = add( *s, sy ); + + z = div_w( x, y); + + if ( sign != 0 ) + { + z = L_negate( z ); + } + + return z; +} +#endif /* DIV32_OPT */ Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) { diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 92994542e..243327164 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -328,9 +328,16 @@ Word16 BASOP_Util_Divide3232_Scale( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ +#ifdef DIV32_OPT +Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ + Word32 y, /*!< i : Denominator*/ + Word16 *s, /*!< o : Additional scalefactor difference*/ + Word16 bits ); /*!< o : Additional scalefactor difference*/ +#else /* DIV32_OPT */ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ +#endif /* DIV32_OPT */ /************************************************************************/ -- GitLab From 7d6d31ec2330ffc157d5639bbe53f325c83a242f Mon Sep 17 00:00:00 2001 From: patilsat Date: Sat, 25 Jan 2025 06:31:47 -0800 Subject: [PATCH 2/7] simplify changes for fixed nbits=26 --- lib_com/basop32.h | 4 +++ lib_com/basop_util.c | 56 +++------------------------------------ lib_com/basop_util.h | 7 ----- lib_com/options.h | 1 + lib_enc/ivas_mcmasa_enc.c | 5 +++- 5 files changed, 12 insertions(+), 61 deletions(-) diff --git a/lib_com/basop32.h b/lib_com/basop32.h index 96fd8d166..390607a0c 100644 --- a/lib_com/basop32.h +++ b/lib_com/basop32.h @@ -167,7 +167,11 @@ Word32 L_abs( Word32 L_var1 ); /* Long abs, Word32 DEPR_L_sat_co( Word32 L_var1, Flag Overflow, Flag Carry ); /* Long saturation, 4 */ Word16 norm_s( Word16 var1 ); /* Short norm, 1 */ Word16 div_s( Word16 var1, Word16 var2 ); /* Short division, 18 */ +#ifdef DIV32_OPT +Word32 div_w( Word32 L_num, Word32 L_den, Word16 nbits ); +#else Word32 div_w( Word32 L_num, Word32 L_den ); +#endif Word16 norm_l( Word32 L_var1 ); /* Long norm, 1 */ Word32 L_sat( Word32 L_var1 ); /* Long saturation, 4 */ diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 51c3ddf98..40923365e 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1056,7 +1056,6 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } #endif /* DIV32_OPT */ -#ifndef DIV32_OPT Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { Word32 z; @@ -1099,59 +1098,11 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) move16(); *s = add( *s, sy ); +#ifndef DIV32_OPT z = div_w( x, y ); - - if ( sign != 0 ) - { - z = L_negate( z ); - } - - return z; -} #else /* DIV32_OPT */ -Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s, Word16 bits ) -{ - Word32 z; - Word16 sx; - Word16 sy; - Word32 sign; - - /* assert (x >= (Word32)0); */ - assert( y != (Word32) 0 ); - - sign = 0; - move16(); - - IF( x < 0 ) - { - x = L_negate( x ); - sign = L_xor( sign, 1 ); - } - - IF( y < 0 ) - { - y = L_negate( y ); - sign = L_xor( sign, 1 ); - } - - IF( x == (Word32) 0 ) - { - *s = 0; - return ( (Word32) 0 ); - } - - sx = norm_l( x ); - x = L_shl( x, sx ); - x = L_shr( x, 1 ); - move16(); - *s = sub( 1, sx ); - - sy = norm_l( y ); - y = L_shl( y, sy ); - move16(); - *s = add( *s, sy ); - - z = div_w( x, y); + z = div_w( x, y, 26 ); +#endif /* DIV32_OPT */ if ( sign != 0 ) { @@ -1160,7 +1111,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s, Word1 return z; } -#endif /* DIV32_OPT */ Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) { diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 243327164..92994542e 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -328,16 +328,9 @@ Word16 BASOP_Util_Divide3232_Scale( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ -#ifdef DIV32_OPT -Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ - Word32 y, /*!< i : Denominator*/ - Word16 *s, /*!< o : Additional scalefactor difference*/ - Word16 bits ); /*!< o : Additional scalefactor difference*/ -#else /* DIV32_OPT */ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ -#endif /* DIV32_OPT */ /************************************************************************/ diff --git a/lib_com/options.h b/lib_com/options.h index e58555dfe..c8110501c 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -70,6 +70,7 @@ #define BASOP_NOGLOB_DECLARE_LOCAL #endif +#define DIV32_OPT #define IVAS_FLOAT_FIXED #define IVAS_FLOAT_FIXED_CONVERSIONS /* Temporary macro to keep track of intermediate flt to fixed and fixed to flt conversions */ #define MSAN_FIX diff --git a/lib_enc/ivas_mcmasa_enc.c b/lib_enc/ivas_mcmasa_enc.c index 34d43d366..4e22a65eb 100644 --- a/lib_enc/ivas_mcmasa_enc.c +++ b/lib_enc/ivas_mcmasa_enc.c @@ -2326,8 +2326,11 @@ static void computeLfeEnergy_fx( Copy32( data_fx[lfeChannelIndex] + hMcMasa->offset_comp, &( delayedInputSignal[0][hMcMasa->num_slots_delay_comp * l_ts] ), ( MDFT_NO_COL_MAX - hMcMasa->num_slots_delay_comp ) * l_ts ); // q_inp Copy32( &( hMcMasa->delay_buffer_lfe[1][0] ), &( delayedInputSignal[1][0] ), hMcMasa->num_slots_delay_comp * l_ts ); // q_inp Copy32( data_fx[separateChannelIndex] + hMcMasa->offset_comp, &( delayedInputSignal[1][hMcMasa->num_slots_delay_comp * l_ts] ), ( MDFT_NO_COL_MAX - hMcMasa->num_slots_delay_comp ) * l_ts ); // q_inp - +#ifdef DIV32_OPT + lowpassCoef = L_shl( div_w( 1, (Word32) hMcMasa->ringBufferSize, 26 ), Q6 ); // Q.37(31+6) +#else lowpassCoef = L_shl( div_w( 1, (Word32) hMcMasa->ringBufferSize ), Q6 ); // Q.37(31+6) +#endif FOR( i = 0; i < input_frame; i++ ) { -- GitLab From af5904558167635f536562843f81a5bb9bb30bdb Mon Sep 17 00:00:00 2001 From: patilsat Date: Sat, 25 Jan 2025 22:35:36 -0800 Subject: [PATCH 3/7] Fix comparisons in div_w operator --- lib_com/basop_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 40923365e..0b2610540 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -982,7 +982,7 @@ Word32 div_w( Word32 L_num, Word32 L_den, Word16 bits ) W_num = W_deposit32_l( L_num ); W_den = W_deposit32_l( L_den ); - IF( GT_64( W_num, W_den ) ) + IF( GE_64( W_num, W_den ) ) { return MAX_32; } @@ -996,7 +996,7 @@ Word32 div_w( Word32 L_num, Word32 L_den, Word16 bits ) L_var_out = L_shl( L_var_out, 1 ); W_num = W_shl( W_num, 1 ); - IF( GT_64( W_num, W_den ) ) + if( GE_64( W_num, W_den ) ) { W_num = W_sub( W_num, W_den ); L_var_out = L_add( L_var_out, 1 ); -- GitLab From 29cb9086a92ec47c06ded68a140bc297f0b13aee Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 30 Jan 2025 16:36:35 +0100 Subject: [PATCH 4/7] added alternative 32/32 bit division using Newton/Raphson method with about 40 BASOP weights --- lib_com/basop_util.c | 406 +++++++++++++++++++++++++++++++++++++++++++ lib_com/options.h | 1 + 2 files changed, 407 insertions(+) mode change 100644 => 100755 lib_com/basop_util.c mode change 100644 => 100755 lib_com/options.h diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c old mode 100644 new mode 100755 index 0b2610540..bb98db494 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1056,9 +1056,414 @@ Word32 div_w( Word32 L_num, Word32 L_den ) } #endif /* DIV32_OPT */ +#ifdef DIV32_OPT_NEWTON + +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, /*!< i : Numerator*/ + Word32 y, /*!< i : Denominator*/ + Word16 *s ); /*!< o : Additional scalefactor difference*/ + + +/* + Table of 256 precalculated estimates to be used by the "div_w_newton" + function using the Newton/Raphson method. + Note: The first table value (for denominator near 0x40000000) is not fully + accurate and should not be used. + */ +Word32 division_lookup[256] = { + /* Precalculated rounded results for 0x40000000 / b with b in [0x40000000 ... 0x7FFFFFFF] */ + 0x7FFFFFFF, // 1.000000000000000 i=0 0.5 / 0.5+0/512 (b=0x40000000) + 0x7F807F80, // 0.996108949416342 i=1 0.5 / 0.5+1/512 (b=0x40400000) + 0x7F01FC07, // 0.992248062015504 i=2 0.5 / 0.5+2/512 (b=0x40800000) + 0x7E8472A8, // 0.988416988416988 i=3 0.5 / 0.5+3/512 (b=0x40C00000) + 0x7E07E07E, // 0.984615384615385 i=4 0.5 / 0.5+4/512 (b=0x41000000) + 0x7D8C42B2, // 0.980842911877395 i=5 0.5 / 0.5+5/512 (b=0x41400000) + 0x7D119679, // 0.977099236641221 i=6 0.5 / 0.5+6/512 (b=0x41800000) + 0x7C97D910, // 0.973384030418251 i=7 0.5 / 0.5+7/512 (b=0x41C00000) + 0x7C1F07C1, // 0.969696969696970 i=8 0.5 / 0.5+8/512 (b=0x42000000) + 0x7BA71FE1, // 0.966037735849057 i=9 0.5 / 0.5+9/512 (b=0x42400000) + 0x7B301ECC, // 0.962406015037594 i=10 0.5 / 0.5+10/512 (b=0x42800000) + 0x7ABA01EA, // 0.958801498127341 i=11 0.5 / 0.5+11/512 (b=0x42C00000) + 0x7A44C6AF, // 0.955223880597015 i=12 0.5 / 0.5+12/512 (b=0x43000000) + 0x79D06A96, // 0.951672862453532 i=13 0.5 / 0.5+13/512 (b=0x43400000) + 0x795CEB24, // 0.948148148148148 i=14 0.5 / 0.5+14/512 (b=0x43800000) + 0x78EA45E7, // 0.944649446494465 i=15 0.5 / 0.5+15/512 (b=0x43C00000) + 0x78787878, // 0.941176470588235 i=16 0.5 / 0.5+16/512 (b=0x44000000) + 0x78078078, // 0.937728937728938 i=17 0.5 / 0.5+17/512 (b=0x44400000) + 0x77975B8F, // 0.934306569343066 i=18 0.5 / 0.5+18/512 (b=0x44800000) + 0x77280772, // 0.930909090909091 i=19 0.5 / 0.5+19/512 (b=0x44C00000) + 0x76B981DA, // 0.927536231884058 i=20 0.5 / 0.5+20/512 (b=0x45000000) + 0x764BC88C, // 0.924187725631769 i=21 0.5 / 0.5+21/512 (b=0x45400000) + 0x75DED952, // 0.920863309352518 i=22 0.5 / 0.5+22/512 (b=0x45800000) + 0x7572B201, // 0.917562724014337 i=23 0.5 / 0.5+23/512 (b=0x45C00000) + 0x75075075, // 0.914285714285714 i=24 0.5 / 0.5+24/512 (b=0x46000000) + 0x749CB28F, // 0.911032028469751 i=25 0.5 / 0.5+25/512 (b=0x46400000) + 0x7432D63D, // 0.907801418439716 i=26 0.5 / 0.5+26/512 (b=0x46800000) + 0x73C9B971, // 0.904593639575972 i=27 0.5 / 0.5+27/512 (b=0x46C00000) + 0x73615A24, // 0.901408450704225 i=28 0.5 / 0.5+28/512 (b=0x47000000) + 0x72F9B658, // 0.898245614035088 i=29 0.5 / 0.5+29/512 (b=0x47400000) + 0x7292CC15, // 0.895104895104895 i=30 0.5 / 0.5+30/512 (b=0x47800000) + 0x722C996B, // 0.891986062717770 i=31 0.5 / 0.5+31/512 (b=0x47C00000) + 0x71C71C71, // 0.888888888888889 i=32 0.5 / 0.5+32/512 (b=0x48000000) + 0x71625344, // 0.885813148788927 i=33 0.5 / 0.5+33/512 (b=0x48400000) + 0x70FE3C07, // 0.882758620689655 i=34 0.5 / 0.5+34/512 (b=0x48800000) + 0x709AD4E4, // 0.879725085910653 i=35 0.5 / 0.5+35/512 (b=0x48C00000) + 0x70381C0E, // 0.876712328767123 i=36 0.5 / 0.5+36/512 (b=0x49000000) + 0x6FD60FBA, // 0.873720136518771 i=37 0.5 / 0.5+37/512 (b=0x49400000) + 0x6F74AE26, // 0.870748299319728 i=38 0.5 / 0.5+38/512 (b=0x49800000) + 0x6F13F596, // 0.867796610169492 i=39 0.5 / 0.5+39/512 (b=0x49C00000) + 0x6EB3E453, // 0.864864864864865 i=40 0.5 / 0.5+40/512 (b=0x4A000000) + 0x6E5478AC, // 0.861952861952862 i=41 0.5 / 0.5+41/512 (b=0x4A400000) + 0x6DF5B0F7, // 0.859060402684564 i=42 0.5 / 0.5+42/512 (b=0x4A800000) + 0x6D978B8E, // 0.856187290969900 i=43 0.5 / 0.5+43/512 (b=0x4AC00000) + 0x6D3A06D3, // 0.853333333333333 i=44 0.5 / 0.5+44/512 (b=0x4B000000) + 0x6CDD212B, // 0.850498338870432 i=45 0.5 / 0.5+45/512 (b=0x4B400000) + 0x6C80D901, // 0.847682119205298 i=46 0.5 / 0.5+46/512 (b=0x4B800000) + 0x6C252CC7, // 0.844884488448845 i=47 0.5 / 0.5+47/512 (b=0x4BC00000) + 0x6BCA1AF2, // 0.842105263157895 i=48 0.5 / 0.5+48/512 (b=0x4C000000) + 0x6B6FA1FE, // 0.839344262295082 i=49 0.5 / 0.5+49/512 (b=0x4C400000) + 0x6B15C06B, // 0.836601307189543 i=50 0.5 / 0.5+50/512 (b=0x4C800000) + 0x6ABC74BE, // 0.833876221498371 i=51 0.5 / 0.5+51/512 (b=0x4CC00000) + 0x6A63BD81, // 0.831168831168831 i=52 0.5 / 0.5+52/512 (b=0x4D000000) + 0x6A0B9944, // 0.828478964401295 i=53 0.5 / 0.5+53/512 (b=0x4D400000) + 0x69B4069B, // 0.825806451612903 i=54 0.5 / 0.5+54/512 (b=0x4D800000) + 0x695D041D, // 0.823151125401929 i=55 0.5 / 0.5+55/512 (b=0x4DC00000) + 0x69069069, // 0.820512820512820 i=56 0.5 / 0.5+56/512 (b=0x4E000000) + 0x68B0AA1F, // 0.817891373801917 i=57 0.5 / 0.5+57/512 (b=0x4E400000) + 0x685B4FE5, // 0.815286624203822 i=58 0.5 / 0.5+58/512 (b=0x4E800000) + 0x68068068, // 0.812698412698413 i=59 0.5 / 0.5+59/512 (b=0x4EC00000) + 0x67B23A54, // 0.810126582278481 i=60 0.5 / 0.5+60/512 (b=0x4F000000) + 0x675E7C5D, // 0.807570977917981 i=61 0.5 / 0.5+61/512 (b=0x4F400000) + 0x670B453B, // 0.805031446540881 i=62 0.5 / 0.5+62/512 (b=0x4F800000) + 0x66B893A9, // 0.802507836990596 i=63 0.5 / 0.5+63/512 (b=0x4FC00000) + 0x66666666, // 0.800000000000000 i=64 0.5 / 0.5+64/512 (b=0x50000000) + 0x6614BC36, // 0.797507788161994 i=65 0.5 / 0.5+65/512 (b=0x50400000) + 0x65C393E0, // 0.795031055900621 i=66 0.5 / 0.5+66/512 (b=0x50800000) + 0x6572EC2F, // 0.792569659442725 i=67 0.5 / 0.5+67/512 (b=0x50C00000) + 0x6522C3F3, // 0.790123456790123 i=68 0.5 / 0.5+68/512 (b=0x51000000) + 0x64D319FE, // 0.787692307692308 i=69 0.5 / 0.5+69/512 (b=0x51400000) + 0x6483ED27, // 0.785276073619632 i=70 0.5 / 0.5+70/512 (b=0x51800000) + 0x64353C48, // 0.782874617737003 i=71 0.5 / 0.5+71/512 (b=0x51C00000) + 0x63E7063E, // 0.780487804878049 i=72 0.5 / 0.5+72/512 (b=0x52000000) + 0x639949EB, // 0.778115501519757 i=73 0.5 / 0.5+73/512 (b=0x52400000) + 0x634C0634, // 0.775757575757576 i=74 0.5 / 0.5+74/512 (b=0x52800000) + 0x62FF3A01, // 0.773413897280967 i=75 0.5 / 0.5+75/512 (b=0x52C00000) + 0x62B2E43D, // 0.771084337349398 i=76 0.5 / 0.5+76/512 (b=0x53000000) + 0x626703D8, // 0.768768768768769 i=77 0.5 / 0.5+77/512 (b=0x53400000) + 0x621B97C2, // 0.766467065868264 i=78 0.5 / 0.5+78/512 (b=0x53800000) + 0x61D09EF3, // 0.764179104477612 i=79 0.5 / 0.5+79/512 (b=0x53C00000) + 0x61861861, // 0.761904761904762 i=80 0.5 / 0.5+80/512 (b=0x54000000) + 0x613C0309, // 0.759643916913947 i=81 0.5 / 0.5+81/512 (b=0x54400000) + 0x60F25DEA, // 0.757396449704142 i=82 0.5 / 0.5+82/512 (b=0x54800000) + 0x60A92806, // 0.755162241887906 i=83 0.5 / 0.5+83/512 (b=0x54C00000) + 0x60606060, // 0.752941176470588 i=84 0.5 / 0.5+84/512 (b=0x55000000) + 0x60180601, // 0.750733137829912 i=85 0.5 / 0.5+85/512 (b=0x55400000) + 0x5FD017F4, // 0.748538011695906 i=86 0.5 / 0.5+86/512 (b=0x55800000) + 0x5F889545, // 0.746355685131195 i=87 0.5 / 0.5+87/512 (b=0x55C00000) + 0x5F417D05, // 0.744186046511628 i=88 0.5 / 0.5+88/512 (b=0x56000000) + 0x5EFACE48, // 0.742028985507246 i=89 0.5 / 0.5+89/512 (b=0x56400000) + 0x5EB48823, // 0.739884393063584 i=90 0.5 / 0.5+90/512 (b=0x56800000) + 0x5E6EA9AE, // 0.737752161383285 i=91 0.5 / 0.5+91/512 (b=0x56C00000) + 0x5E293205, // 0.735632183908046 i=92 0.5 / 0.5+92/512 (b=0x57000000) + 0x5DE42046, // 0.733524355300860 i=93 0.5 / 0.5+93/512 (b=0x57400000) + 0x5D9F7390, // 0.731428571428571 i=94 0.5 / 0.5+94/512 (b=0x57800000) + 0x5D5B2B08, // 0.729344729344729 i=95 0.5 / 0.5+95/512 (b=0x57C00000) + 0x5D1745D1, // 0.727272727272727 i=96 0.5 / 0.5+96/512 (b=0x58000000) + 0x5CD3C315, // 0.725212464589235 i=97 0.5 / 0.5+97/512 (b=0x58400000) + 0x5C90A1FD, // 0.723163841807910 i=98 0.5 / 0.5+98/512 (b=0x58800000) + 0x5C4DE1B6, // 0.721126760563380 i=99 0.5 / 0.5+99/512 (b=0x58C00000) + 0x5C0B8170, // 0.719101123595506 i=100 0.5 / 0.5+100/512 (b=0x59000000) + 0x5BC9805B, // 0.717086834733894 i=101 0.5 / 0.5+101/512 (b=0x59400000) + 0x5B87DDAD, // 0.715083798882682 i=102 0.5 / 0.5+102/512 (b=0x59800000) + 0x5B46989A, // 0.713091922005571 i=103 0.5 / 0.5+103/512 (b=0x59C00000) + 0x5B05B05B, // 0.711111111111111 i=104 0.5 / 0.5+104/512 (b=0x5A000000) + 0x5AC5242A, // 0.709141274238227 i=105 0.5 / 0.5+105/512 (b=0x5A400000) + 0x5A84F345, // 0.707182320441989 i=106 0.5 / 0.5+106/512 (b=0x5A800000) + 0x5A451CEA, // 0.705234159779614 i=107 0.5 / 0.5+107/512 (b=0x5AC00000) + 0x5A05A05A, // 0.703296703296703 i=108 0.5 / 0.5+108/512 (b=0x5B000000) + 0x59C67CD8, // 0.701369863013699 i=109 0.5 / 0.5+109/512 (b=0x5B400000) + 0x5987B1A9, // 0.699453551912568 i=110 0.5 / 0.5+110/512 (b=0x5B800000) + 0x59493E14, // 0.697547683923706 i=111 0.5 / 0.5+111/512 (b=0x5BC00000) + 0x590B2164, // 0.695652173913043 i=112 0.5 / 0.5+112/512 (b=0x5C000000) + 0x58CD5AE2, // 0.693766937669377 i=113 0.5 / 0.5+113/512 (b=0x5C400000) + 0x588FE9DC, // 0.691891891891892 i=114 0.5 / 0.5+114/512 (b=0x5C800000) + 0x5852CDA0, // 0.690026954177898 i=115 0.5 / 0.5+115/512 (b=0x5CC00000) + 0x58160581, // 0.688172043010753 i=116 0.5 / 0.5+116/512 (b=0x5D000000) + 0x57D990D0, // 0.686327077747989 i=117 0.5 / 0.5+117/512 (b=0x5D400000) + 0x579D6EE3, // 0.684491978609626 i=118 0.5 / 0.5+118/512 (b=0x5D800000) + 0x57619F0F, // 0.682666666666667 i=119 0.5 / 0.5+119/512 (b=0x5DC00000) + 0x572620AE, // 0.680851063829787 i=120 0.5 / 0.5+120/512 (b=0x5E000000) + 0x56EAF319, // 0.679045092838196 i=121 0.5 / 0.5+121/512 (b=0x5E400000) + 0x56B015AC, // 0.677248677248677 i=122 0.5 / 0.5+122/512 (b=0x5E800000) + 0x567587C4, // 0.675461741424802 i=123 0.5 / 0.5+123/512 (b=0x5EC00000) + 0x563B48C2, // 0.673684210526316 i=124 0.5 / 0.5+124/512 (b=0x5F000000) + 0x56015805, // 0.671916010498688 i=125 0.5 / 0.5+125/512 (b=0x5F400000) + 0x55C7B4F1, // 0.670157068062827 i=126 0.5 / 0.5+126/512 (b=0x5F800000) + 0x558E5EE9, // 0.668407310704961 i=127 0.5 / 0.5+127/512 (b=0x5FC00000) + 0x55555555, // 0.666666666666667 i=128 0.5 / 0.5+128/512 (b=0x60000000) + 0x551C979A, // 0.664935064935065 i=129 0.5 / 0.5+129/512 (b=0x60400000) + 0x54E42523, // 0.663212435233161 i=130 0.5 / 0.5+130/512 (b=0x60800000) + 0x54ABFD5A, // 0.661498708010336 i=131 0.5 / 0.5+131/512 (b=0x60C00000) + 0x54741FAB, // 0.659793814432990 i=132 0.5 / 0.5+132/512 (b=0x61000000) + 0x543C8B84, // 0.658097686375321 i=133 0.5 / 0.5+133/512 (b=0x61400000) + 0x54054054, // 0.656410256410256 i=134 0.5 / 0.5+134/512 (b=0x61800000) + 0x53CE3D8B, // 0.654731457800512 i=135 0.5 / 0.5+135/512 (b=0x61C00000) + 0x5397829C, // 0.653061224489796 i=136 0.5 / 0.5+136/512 (b=0x62000000) + 0x53610EFB, // 0.651399491094148 i=137 0.5 / 0.5+137/512 (b=0x62400000) + 0x532AE21C, // 0.649746192893401 i=138 0.5 / 0.5+138/512 (b=0x62800000) + 0x52F4FB76, // 0.648101265822785 i=139 0.5 / 0.5+139/512 (b=0x62C00000) + 0x52BF5A81, // 0.646464646464647 i=140 0.5 / 0.5+140/512 (b=0x63000000) + 0x5289FEB5, // 0.644836272040302 i=141 0.5 / 0.5+141/512 (b=0x63400000) + 0x5254E78E, // 0.643216080402010 i=142 0.5 / 0.5+142/512 (b=0x63800000) + 0x52201488, // 0.641604010025063 i=143 0.5 / 0.5+143/512 (b=0x63C00000) + 0x51EB851E, // 0.640000000000000 i=144 0.5 / 0.5+144/512 (b=0x64000000) + 0x51B738D1, // 0.638403990024938 i=145 0.5 / 0.5+145/512 (b=0x64400000) + 0x51832F1F, // 0.636815920398010 i=146 0.5 / 0.5+146/512 (b=0x64800000) + 0x514F678B, // 0.635235732009926 i=147 0.5 / 0.5+147/512 (b=0x64C00000) + 0x511BE195, // 0.633663366336634 i=148 0.5 / 0.5+148/512 (b=0x65000000) + 0x50E89CC2, // 0.632098765432099 i=149 0.5 / 0.5+149/512 (b=0x65400000) + 0x50B59897, // 0.630541871921182 i=150 0.5 / 0.5+150/512 (b=0x65800000) + 0x5082D499, // 0.628992628992629 i=151 0.5 / 0.5+151/512 (b=0x65C00000) + 0x50505050, // 0.627450980392157 i=152 0.5 / 0.5+152/512 (b=0x66000000) + 0x501E0B44, // 0.625916870415648 i=153 0.5 / 0.5+153/512 (b=0x66400000) + 0x4FEC04FE, // 0.624390243902439 i=154 0.5 / 0.5+154/512 (b=0x66800000) + 0x4FBA3D0A, // 0.622871046228710 i=155 0.5 / 0.5+155/512 (b=0x66C00000) + 0x4F88B2F3, // 0.621359223300971 i=156 0.5 / 0.5+156/512 (b=0x67000000) + 0x4F576646, // 0.619854721549637 i=157 0.5 / 0.5+157/512 (b=0x67400000) + 0x4F265691, // 0.618357487922705 i=158 0.5 / 0.5+158/512 (b=0x67800000) + 0x4EF58364, // 0.616867469879518 i=159 0.5 / 0.5+159/512 (b=0x67C00000) + 0x4EC4EC4E, // 0.615384615384615 i=160 0.5 / 0.5+160/512 (b=0x68000000) + 0x4E9490E1, // 0.613908872901679 i=161 0.5 / 0.5+161/512 (b=0x68400000) + 0x4E6470B0, // 0.612440191387560 i=162 0.5 / 0.5+162/512 (b=0x68800000) + 0x4E348B4D, // 0.610978520286396 i=163 0.5 / 0.5+163/512 (b=0x68C00000) + 0x4E04E04E, // 0.609523809523810 i=164 0.5 / 0.5+164/512 (b=0x69000000) + 0x4DD56F47, // 0.608076009501188 i=165 0.5 / 0.5+165/512 (b=0x69400000) + 0x4DA637CF, // 0.606635071090047 i=166 0.5 / 0.5+166/512 (b=0x69800000) + 0x4D77397E, // 0.605200945626478 i=167 0.5 / 0.5+167/512 (b=0x69C00000) + 0x4D4873EC, // 0.603773584905660 i=168 0.5 / 0.5+168/512 (b=0x6A000000) + 0x4D19E6B3, // 0.602352941176471 i=169 0.5 / 0.5+169/512 (b=0x6A400000) + 0x4CEB916D, // 0.600938967136150 i=170 0.5 / 0.5+170/512 (b=0x6A800000) + 0x4CBD73B5, // 0.599531615925059 i=171 0.5 / 0.5+171/512 (b=0x6AC00000) + 0x4C8F8D28, // 0.598130841121495 i=172 0.5 / 0.5+172/512 (b=0x6B000000) + 0x4C61DD63, // 0.596736596736597 i=173 0.5 / 0.5+173/512 (b=0x6B400000) + 0x4C346404, // 0.595348837209302 i=174 0.5 / 0.5+174/512 (b=0x6B800000) + 0x4C0720AB, // 0.593967517401392 i=175 0.5 / 0.5+175/512 (b=0x6BC00000) + 0x4BDA12F6, // 0.592592592592593 i=176 0.5 / 0.5+176/512 (b=0x6C000000) + 0x4BAD3A87, // 0.591224018475751 i=177 0.5 / 0.5+177/512 (b=0x6C400000) + 0x4B809701, // 0.589861751152074 i=178 0.5 / 0.5+178/512 (b=0x6C800000) + 0x4B542804, // 0.588505747126437 i=179 0.5 / 0.5+179/512 (b=0x6CC00000) + 0x4B27ED36, // 0.587155963302752 i=180 0.5 / 0.5+180/512 (b=0x6D000000) + 0x4AFBE639, // 0.585812356979405 i=181 0.5 / 0.5+181/512 (b=0x6D400000) + 0x4AD012B4, // 0.584474885844749 i=182 0.5 / 0.5+182/512 (b=0x6D800000) + 0x4AA4724B, // 0.583143507972665 i=183 0.5 / 0.5+183/512 (b=0x6DC00000) + 0x4A7904A7, // 0.581818181818182 i=184 0.5 / 0.5+184/512 (b=0x6E000000) + 0x4A4DC96E, // 0.580498866213152 i=185 0.5 / 0.5+185/512 (b=0x6E400000) + 0x4A22C04A, // 0.579185520361991 i=186 0.5 / 0.5+186/512 (b=0x6E800000) + 0x49F7E8E2, // 0.577878103837472 i=187 0.5 / 0.5+187/512 (b=0x6EC00000) + 0x49CD42E2, // 0.576576576576577 i=188 0.5 / 0.5+188/512 (b=0x6F000000) + 0x49A2CDF3, // 0.575280898876405 i=189 0.5 / 0.5+189/512 (b=0x6F400000) + 0x497889C2, // 0.573991031390135 i=190 0.5 / 0.5+190/512 (b=0x6F800000) + 0x494E75FA, // 0.572706935123042 i=191 0.5 / 0.5+191/512 (b=0x6FC00000) + 0x49249249, // 0.571428571428571 i=192 0.5 / 0.5+192/512 (b=0x70000000) + 0x48FADE5C, // 0.570155902004454 i=193 0.5 / 0.5+193/512 (b=0x70400000) + 0x48D159E2, // 0.568888888888889 i=194 0.5 / 0.5+194/512 (b=0x70800000) + 0x48A8048A, // 0.567627494456763 i=195 0.5 / 0.5+195/512 (b=0x70C00000) + 0x487EDE04, // 0.566371681415929 i=196 0.5 / 0.5+196/512 (b=0x71000000) + 0x4855E601, // 0.565121412803532 i=197 0.5 / 0.5+197/512 (b=0x71400000) + 0x482D1C31, // 0.563876651982379 i=198 0.5 / 0.5+198/512 (b=0x71800000) + 0x48048048, // 0.562637362637363 i=199 0.5 / 0.5+199/512 (b=0x71C00000) + 0x47DC11F7, // 0.561403508771930 i=200 0.5 / 0.5+200/512 (b=0x72000000) + 0x47B3D0F1, // 0.560175054704595 i=201 0.5 / 0.5+201/512 (b=0x72400000) + 0x478BBCEC, // 0.558951965065502 i=202 0.5 / 0.5+202/512 (b=0x72800000) + 0x4763D59C, // 0.557734204793028 i=203 0.5 / 0.5+203/512 (b=0x72C00000) + 0x473C1AB6, // 0.556521739130435 i=204 0.5 / 0.5+204/512 (b=0x73000000) + 0x47148BF0, // 0.555314533622560 i=205 0.5 / 0.5+205/512 (b=0x73400000) + 0x46ED2901, // 0.554112554112554 i=206 0.5 / 0.5+206/512 (b=0x73800000) + 0x46C5F19F, // 0.552915766738661 i=207 0.5 / 0.5+207/512 (b=0x73C00000) + 0x469EE584, // 0.551724137931034 i=208 0.5 / 0.5+208/512 (b=0x74000000) + 0x46780467, // 0.550537634408602 i=209 0.5 / 0.5+209/512 (b=0x74400000) + 0x46514E02, // 0.549356223175966 i=210 0.5 / 0.5+210/512 (b=0x74800000) + 0x462AC20E, // 0.548179871520343 i=211 0.5 / 0.5+211/512 (b=0x74C00000) + 0x46046046, // 0.547008547008547 i=212 0.5 / 0.5+212/512 (b=0x75000000) + 0x45DE2864, // 0.545842217484009 i=213 0.5 / 0.5+213/512 (b=0x75400000) + 0x45B81A25, // 0.544680851063830 i=214 0.5 / 0.5+214/512 (b=0x75800000) + 0x45923543, // 0.543524416135881 i=215 0.5 / 0.5+215/512 (b=0x75C00000) + 0x456C797D, // 0.542372881355932 i=216 0.5 / 0.5+216/512 (b=0x76000000) + 0x4546E68F, // 0.541226215644820 i=217 0.5 / 0.5+217/512 (b=0x76400000) + 0x45217C38, // 0.540084388185654 i=218 0.5 / 0.5+218/512 (b=0x76800000) + 0x44FC3A34, // 0.538947368421053 i=219 0.5 / 0.5+219/512 (b=0x76C00000) + 0x44D72044, // 0.537815126050420 i=220 0.5 / 0.5+220/512 (b=0x77000000) + 0x44B22E27, // 0.536687631027254 i=221 0.5 / 0.5+221/512 (b=0x77400000) + 0x448D639D, // 0.535564853556485 i=222 0.5 / 0.5+222/512 (b=0x77800000) + 0x4468C066, // 0.534446764091858 i=223 0.5 / 0.5+223/512 (b=0x77C00000) + 0x44444444, // 0.533333333333333 i=224 0.5 / 0.5+224/512 (b=0x78000000) + 0x441FEEF8, // 0.532224532224532 i=225 0.5 / 0.5+225/512 (b=0x78400000) + 0x43FBC043, // 0.531120331950207 i=226 0.5 / 0.5+226/512 (b=0x78800000) + 0x43D7B7EA, // 0.530020703933747 i=227 0.5 / 0.5+227/512 (b=0x78C00000) + 0x43B3D5AF, // 0.528925619834711 i=228 0.5 / 0.5+228/512 (b=0x79000000) + 0x43901956, // 0.527835051546392 i=229 0.5 / 0.5+229/512 (b=0x79400000) + 0x436C82A2, // 0.526748971193416 i=230 0.5 / 0.5+230/512 (b=0x79800000) + 0x43491158, // 0.525667351129363 i=231 0.5 / 0.5+231/512 (b=0x79C00000) + 0x4325C53E, // 0.524590163934426 i=232 0.5 / 0.5+232/512 (b=0x7A000000) + 0x43029E1A, // 0.523517382413088 i=233 0.5 / 0.5+233/512 (b=0x7A400000) + 0x42DF9BB0, // 0.522448979591837 i=234 0.5 / 0.5+234/512 (b=0x7A800000) + 0x42BCBDC8, // 0.521384928716904 i=235 0.5 / 0.5+235/512 (b=0x7AC00000) + 0x429A0429, // 0.520325203252033 i=236 0.5 / 0.5+236/512 (b=0x7B000000) + 0x42776E9A, // 0.519269776876268 i=237 0.5 / 0.5+237/512 (b=0x7B400000) + 0x4254FCE4, // 0.518218623481781 i=238 0.5 / 0.5+238/512 (b=0x7B800000) + 0x4232AECD, // 0.517171717171717 i=239 0.5 / 0.5+239/512 (b=0x7BC00000) + 0x42108421, // 0.516129032258065 i=240 0.5 / 0.5+240/512 (b=0x7C000000) + 0x41EE7CA6, // 0.515090543259557 i=241 0.5 / 0.5+241/512 (b=0x7C400000) + 0x41CC9829, // 0.514056224899598 i=242 0.5 / 0.5+242/512 (b=0x7C800000) + 0x41AAD671, // 0.513026052104208 i=243 0.5 / 0.5+243/512 (b=0x7CC00000) + 0x4189374B, // 0.512000000000000 i=244 0.5 / 0.5+244/512 (b=0x7D000000) + 0x4167BA81, // 0.510978043912176 i=245 0.5 / 0.5+245/512 (b=0x7D400000) + 0x41465FDF, // 0.509960159362550 i=246 0.5 / 0.5+246/512 (b=0x7D800000) + 0x41252730, // 0.508946322067594 i=247 0.5 / 0.5+247/512 (b=0x7DC00000) + 0x41041041, // 0.507936507936508 i=248 0.5 / 0.5+248/512 (b=0x7E000000) + 0x40E31ADE, // 0.506930693069307 i=249 0.5 / 0.5+249/512 (b=0x7E400000) + 0x40C246D4, // 0.505928853754941 i=250 0.5 / 0.5+250/512 (b=0x7E800000) + 0x40A193F1, // 0.504930966469428 i=251 0.5 / 0.5+251/512 (b=0x7EC00000) + 0x40810204, // 0.503937007874016 i=252 0.5 / 0.5+252/512 (b=0x7F000000) + 0x406090D9, // 0.502946954813359 i=253 0.5 / 0.5+253/512 (b=0x7F400000) + 0x40404040, // 0.501960784313725 i=254 0.5 / 0.5+254/512 (b=0x7F800000) + 0x40201008 // 0.500978473581213 i=255 0.5 / 0.5+255/512 (b=0x7FC00000) +}; + + +/* + * Fractional multiplication of signed a and b, both in Q31. The result is doubled. + * Note: in this test, saturation is not needed. + * BASOP weights: 3 + */ + +static Word32 L_dmult( Word32 L_var1, Word32 L_var2 ) +{ + Word64 L64_var1 = W_mult0_32_32( L_var1, L_var2 ); + L64_var1 = W_shr( L64_var1, 30 ); + return W_extract_l( L64_var1 ); +} + +/* + * 32 by 32 bit division, following the Newton / Raphson method. + * Usage of this low-level procedure by the caller: + * 1. Numerator can use the full range of signed 32-bit datatypes: 0x80000000...0x7FFFFFFF + * Note: Since is not normalized here, but it is multplied to the reciprocal of the + * denominator, the caller should use a normalized and handle any exponent outside. + * 2. Denominator must be normalized into range 0x40000001 to 0x7FFFFFFF (all positive) + * Note: In case of den=0x40000000, the caller is not allowed to call the division routine, + since the result is known. + * Note: num / 0x40000000 equals to num with exp += 1. + * 3. The result is in range 0x40000000 to 0x7FFFFFFF, finally multiplied by . + * BASOP weights: 24 (incl. L_dmult) + */ + +Word32 div_w_newton( Word32 num, Word32 den ) +{ + Word32 x0, x1, x2, x3, diff, result; + + x0 = division_lookup[sub( extract_l( L_shr( den, 22 ) ), 256 )]; + move32(); + + diff = L_sub( 0x40000000, Mpy_32_32( den, x0 ) ); + + x1 = L_add( x0, L_dmult( x0, diff ) ); + diff = L_sub( 0x40000000, Mpy_32_32( den, x1 ) ); + + x2 = L_add( x1, L_dmult( x1, diff ) ); + diff = L_sub( 0x40000000, Mpy_32_32( den, x2 ) ); + + x3 = L_add( x2, L_dmult( x2, diff ) ); + + result = Mpy_32_32( num, x3 ); + + return result; +} + +/* + * 32 / 32 division + * Usage of this global procedure by the caller: + * 1. Numerator can use the full range of signed 32-bit datatypes: 0x80000000...0x7FFFFFFF + * Note: Since is not normalized here, but it is multplied to the reciprocal of the + * denominator, the caller should use a normalized and handle any exponent outside. + * 2. Denominator can use the full range of signed 32-bit datatypes: 0x80000000...0x7FFFFFFF + * except den=0x00000000. In case of 0x80000000, it becomes internally negated to 0x7FFFFFFF. + * 3. The result is 0x00000000 (*s=0)for equals 0x00000000. + * The result is rather left aligned, with up to 1 bit headroom. + * 4. The result exponent is stored in s[0] + * BASOP weights: 41 (incl. div_w_newton) + */ + +Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s ) +{ + Word32 z; + Word16 sx; + Word16 sy; + Word32 sign; + + assert( y != (Word32) 0 ); + + /* Early exit, if numerator is zero */ + IF( x == (Word32) 0 ) + { + *s = 0; + return ( (Word32) 0 ); + } + + sign = L_xor( x, y ); /* check (sign < 0) for result negation */ + + if ( x < 0 ) + { + x = L_negate( x ); + } + + if ( y < 0 ) + { + y = L_negate( y ); + } + + /* Normalize numerator */ + sx = norm_l( x ); + x = L_shl( x, sx ); + + /* Normalize denominator */ + sy = norm_l( y ); + y = L_shl( y, sy ); + + /* Store exponent: + 1 for div_w_newton computing 0.5*num/den */ + *s = sub( add( sy, 1 ), sx ); + move16(); + + /* Special treatment for den=0x40000000 */ + /* Result is known: z=2*num */ + IF( EQ_32( y, 0x40000000 ) ) + { + if ( sign < 0 ) + { + x = L_negate( x ); + } + return x; + } + + /* Invoke division applying Newton/Raphson-Algorithm */ + z = div_w_newton( x, y ); + + if ( sign < 0 ) + { + z = L_negate( z ); + } + + return z; +} +#endif /* DIV32_OPT_NEWTON */ + + Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) { Word32 z; +#ifdef DIV32_OPT_NEWTON + z = BASOP_Util_Divide3232_Scale_FhG( x, y, s ); + return z; +#else Word16 sx; Word16 sy; Word32 sign; @@ -1110,6 +1515,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) } return z; +#endif /* DIV32_OPT_NEWTON */ } Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s ) diff --git a/lib_com/options.h b/lib_com/options.h old mode 100644 new mode 100755 index c8110501c..9e777016a --- a/lib_com/options.h +++ b/lib_com/options.h @@ -71,6 +71,7 @@ #endif #define DIV32_OPT +#define DIV32_OPT_NEWTON /* FhG: Alternative div_w using Newton/Raphson method */ #define IVAS_FLOAT_FIXED #define IVAS_FLOAT_FIXED_CONVERSIONS /* Temporary macro to keep track of intermediate flt to fixed and fixed to flt conversions */ #define MSAN_FIX -- GitLab From 4cfe9afdd88ec5efac46d5212e50aa069fb28df4 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 30 Jan 2025 17:57:30 +0100 Subject: [PATCH 5/7] added prototype for div_w_newton subroutine - avoid compiler warning only --- lib_com/basop_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index bb98db494..2fc761c03 100755 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -1062,8 +1062,8 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, /*!< i : Numerator*/ Word32 y, /*!< i : Denominator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ - -/* +Word32 div_w_newton( Word32 num, Word32 den ); + /* Table of 256 precalculated estimates to be used by the "div_w_newton" function using the Newton/Raphson method. Note: The first table value (for denominator near 0x40000000) is not fully -- GitLab From 4c322af6b63d70ac5c68f2244d4ec2b9ee18aedd Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 31 Jan 2025 09:45:14 +0100 Subject: [PATCH 6/7] deactivated temporarily DIV32_OPT_NEWTON to run pipeline w/o this code --- lib_com/basop_util.c | 28 ++++++++++++++-------------- lib_com/options.h | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) mode change 100755 => 100644 lib_com/basop_util.c diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c old mode 100755 new mode 100644 index 2fc761c03..e274f8e28 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -996,17 +996,17 @@ Word32 div_w( Word32 L_num, Word32 L_den, Word16 bits ) L_var_out = L_shl( L_var_out, 1 ); W_num = W_shl( W_num, 1 ); - if( GE_64( W_num, W_den ) ) + if ( GE_64( W_num, W_den ) ) { W_num = W_sub( W_num, W_den ); L_var_out = L_add( L_var_out, 1 ); } } - return L_shl(L_var_out, 31-bits); + return L_shl( L_var_out, 31 - bits ); } } -#else /* DIV32_OPT */ +#else /* DIV32_OPT */ Word32 div_w( Word32 L_num, Word32 L_den ) { Word32 L_var_out = 0; @@ -1063,12 +1063,12 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, /*!< i : Numerator*/ Word16 *s ); /*!< o : Additional scalefactor difference*/ Word32 div_w_newton( Word32 num, Word32 den ); - /* - Table of 256 precalculated estimates to be used by the "div_w_newton" - function using the Newton/Raphson method. - Note: The first table value (for denominator near 0x40000000) is not fully - accurate and should not be used. - */ +/* +Table of 256 precalculated estimates to be used by the "div_w_newton" +function using the Newton/Raphson method. +Note: The first table value (for denominator near 0x40000000) is not fully +accurate and should not be used. +*/ Word32 division_lookup[256] = { /* Precalculated rounded results for 0x40000000 / b with b in [0x40000000 ... 0x7FFFFFFF] */ 0x7FFFFFFF, // 1.000000000000000 i=0 0.5 / 0.5+0/512 (b=0x40000000) @@ -1330,7 +1330,7 @@ Word32 division_lookup[256] = { }; -/* +/* * Fractional multiplication of signed a and b, both in Q31. The result is doubled. * Note: in this test, saturation is not needed. * BASOP weights: 3 @@ -1347,10 +1347,10 @@ static Word32 L_dmult( Word32 L_var1, Word32 L_var2 ) * 32 by 32 bit division, following the Newton / Raphson method. * Usage of this low-level procedure by the caller: * 1. Numerator can use the full range of signed 32-bit datatypes: 0x80000000...0x7FFFFFFF - * Note: Since is not normalized here, but it is multplied to the reciprocal of the + * Note: Since is not normalized here, but it is multplied to the reciprocal of the * denominator, the caller should use a normalized and handle any exponent outside. * 2. Denominator must be normalized into range 0x40000001 to 0x7FFFFFFF (all positive) - * Note: In case of den=0x40000000, the caller is not allowed to call the division routine, + * Note: In case of den=0x40000000, the caller is not allowed to call the division routine, since the result is known. * Note: num / 0x40000000 equals to num with exp += 1. * 3. The result is in range 0x40000000 to 0x7FFFFFFF, finally multiplied by . @@ -1383,7 +1383,7 @@ Word32 div_w_newton( Word32 num, Word32 den ) * 32 / 32 division * Usage of this global procedure by the caller: * 1. Numerator can use the full range of signed 32-bit datatypes: 0x80000000...0x7FFFFFFF - * Note: Since is not normalized here, but it is multplied to the reciprocal of the + * Note: Since is not normalized here, but it is multplied to the reciprocal of the * denominator, the caller should use a normalized and handle any exponent outside. * 2. Denominator can use the full range of signed 32-bit datatypes: 0x80000000...0x7FFFFFFF * except den=0x00000000. In case of 0x80000000, it becomes internally negated to 0x7FFFFFFF. @@ -1505,7 +1505,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) #ifndef DIV32_OPT z = div_w( x, y ); -#else /* DIV32_OPT */ +#else /* DIV32_OPT */ z = div_w( x, y, 26 ); #endif /* DIV32_OPT */ diff --git a/lib_com/options.h b/lib_com/options.h index 9e777016a..0e28f0c65 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -71,7 +71,7 @@ #endif #define DIV32_OPT -#define DIV32_OPT_NEWTON /* FhG: Alternative div_w using Newton/Raphson method */ +/*#define DIV32_OPT_NEWTON*/ /* FhG: Alternative div_w using Newton/Raphson method */ #define IVAS_FLOAT_FIXED #define IVAS_FLOAT_FIXED_CONVERSIONS /* Temporary macro to keep track of intermediate flt to fixed and fixed to flt conversions */ #define MSAN_FIX -- GitLab From 4159703fe58400daa38e4c60db7eec4d89b6afe2 Mon Sep 17 00:00:00 2001 From: patilsatish Date: Mon, 24 Mar 2025 16:04:09 +0530 Subject: [PATCH 7/7] Update Cadence and FhG division kernel variants to cover corner cases --- lib_com/basop_util.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index e274f8e28..559e58008 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -979,8 +979,8 @@ Word32 div_w( Word32 L_num, Word32 L_den, Word16 bits ) return ( 0 ); } Word64 W_num, W_den; - W_num = W_deposit32_l( L_num ); - W_den = W_deposit32_l( L_den ); + W_num = W_deposit32_h( L_num ); + W_den = W_deposit32_h( L_den ); IF( GE_64( W_num, W_den ) ) { @@ -1409,13 +1409,27 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s ) return ( (Word32) 0 ); } +#if 0 sign = L_xor( x, y ); /* check (sign < 0) for result negation */ if ( x < 0 ) { x = L_negate( x ); } +#else + IF( EQ_32( y, 0x80000000 ) ) + { + /* Division by -1.0: same as negation of numerator */ + /* Return normalized negated numerator */ + sx = norm_l( x ); + x = L_shl( x, sx ); + *s = negate(sx); + return L_negate( x ); + } + sign = y; + move32(); +#endif if ( y < 0 ) { y = L_negate( y ); @@ -1473,7 +1487,12 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) sign = 0; move16(); - + Word16 exp_offset = 1; + IF (EQ_32(x, 0x80000000)) /* Handle corner case to avoid data loss */ + { + x = L_shr( x, 1 ); + exp_offset += 1; + } IF( x < 0 ) { x = L_negate( x ); @@ -1496,7 +1515,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s ) x = L_shl( x, sx ); x = L_shr( x, 1 ); move16(); - *s = sub( 1, sx ); + *s = sub( exp_offset, sx ); sy = norm_l( y ); y = L_shl( y, sy ); -- GitLab