## number-scan.pkg
#
# The string conversion for the largest int and word types.
# All of the other scan functions can be implemented in terms of them.
# Compiled by:
#
src/lib/std/src/standard-core.sublibstipulate
package i1w = one_word_int; # one_word_int is from
src/lib/std/types-only/basis-structs.pkg package u1w = one_word_unt; # one_word_unt is from
src/lib/std/types-only/basis-structs.pkg #
package it = inline_t; # inline_t is from
src/lib/core/init/built-in.pkg package ns = number_string; # number_string is from
src/lib/std/src/number-string.pkgherein
package number_scan
: (weak)
api {
scan_word
:
ns::Radix #
->
ns::Reader( Char, X )
->
ns::Reader( u1w::Unt, X );
scan_int
:
ns::Radix
->
ns::Reader( Char, X )
->
ns::Reader( i1w::Int, X );
scan_real
:
ns::Reader( Char, X)
->
ns::Reader( Float, X );
# * should be to eight_byte_float::Float *
}
{
package u32 = it::u1; # "u1" == "one-word unsigned int" (32-bits on 32-bit architectures; 64-bit on 64-bit architectures).
package ti = it::ti; # "ti" == "tagged_int".
package i32 = it::i1; # "i1" == "one-word signed int" (32-bits on 32-bit architectures; 64-bit on 64-bit architectures).
package r = it::f64; # "f64" == "64-bit float'.
Unt = u1w::Unt;
(<) = u32::(<);
(>=) = u32::(>=);
(+) = u32::(+);
(-) = u32::(-);
(*) = u32::(*);
largest_word_div10 = 0u429496729: Unt; # 2^32-1 divided by 10 # 64-bit issue
largest_word_mod10 = 0u5: Unt; # remainder # 64-bit issue
largest_neg_int1 = 0ux80000000: Unt; # 64-bit issue
largest_pos_int1 = 0ux7fffffff: Unt; # 64-bit issue
min_int1 = -2147483648: i1w::Int; # 64-bit issue
# A table for mapping digits to values. Whitespace characters map to
# 128, "+" maps to 129, "-", "~" map to 130, "." maps to 131, and the
# characters 0-9, A-Z, a-z map to their * base-36 value. All other
# characters map to 255.
stipulate
cvt_table = "\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\x80\x80\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\x80\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x81\xff\x82\x83\xff\
\\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\
\\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\
\\x19\x1a\x1b\x1c\x1d\x1e\x1f\xff\x21\x22\x23\xff\xff\xff\xff\xff\
\\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\
\\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\xff\xff\xff\x82\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\";
to_int = it::char::ord;
herein
fun code (c: Char)
=
u32::from_int (to_int (it::vector_of_chars::get_byte_as_char (cvt_table, to_int c)));
ws_code = 0u128: Unt; # Code for whitespace
plus_code = 0u129: Unt; # Code for '+'
minus_code = 0u130: Unt; # Code for '-' and '~'
pt_code = 0u131: Unt; # Code for '.'
e_code = 0u14: Unt; # Code for 'e' and 'E'
w_code = 0u32: Unt; # Code for 'w'
x_code = 0u33: Unt; # Code for 'X' and 'X'
end;
Prefix_Pat
=
{ w_okay: Bool, # TRUE if 0[wW] prefix is okay; if this is
# TRUE, then signs (+, -, ~) are not okay.
x_okay: Bool, # TRUE if 0[xX] prefix is okay
pt_okay: Bool, # TRUE if can start with point
is_digit: Unt -> Bool # returns TRUE for allowed digits
};
# scanPrefix: prefix_pat
# ->
# Reader( char, X )
# ->
# X
# ->
# Null_Or { neg: Bool, next: word /* code */, rest: X }
#
# scans prefix for a number:
# binPattern (TRUE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isBinDigit } =>
# (0[wW])?b (b binary digit)
# binPattern (FALSE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isBinDigit } =>
# [-~+]?b
# octPattern (TRUE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isOctDigit } =>
# (0[wW])?o (o octal digit)
# octPattern (FALSE) { wOkay=FALSE, xOkay=FALSE, ptOkay=FALSE, isOctDigit } =>
# [-~+]?o
# hexPattern (TRUE) { wOkay=TRUE, xOkay=TRUE, ptOkay=FALSE, is_hex_digit } =>
# (0[wW][xX])?h (h hex digit)
# hexPattern (FALSE) { wOkay=FALSE, xOkay=TRUE, ptOkay=FALSE, is_hex_digit } =>
# [-~+]?(0[xX])?h
# decPattern (TRUE, FALSE) { wOkay=TRUE, xOkay=FALSE, ptOkay=FALSE, isDecDigit } =>
# (0[wW][xX])?d (d decimal digit)
# decPattern (FALSE, false) { wOkay=FALSE, xOkay=FALSE, ptOkay=FALSE, isDecDigit } =>
# [-~+]?d
# decPattern (FALSE, TRUE) { wOkay=FALSE, xOkay=FALSE, ptOkay=TRUE, isDecDigit } =>
# [-~+]?[.d]
#
# Sign characters, initial 0x, 0u, etc are consumed. The initial
# digit or point code is returned as the value of next.
fun scan_prefix (p: Prefix_Pat) getc cs
=
get_opt_sign (skip_ws cs)
where
fun get_next cs
=
case (getc cs)
#
THE (c, cs) => THE (code c, cs);
NULL => NULL;
esac;
fun skip_ws cs
=
case (get_next cs)
#
THE (c, cs')
=>
if (c == ws_code) skip_ws cs';
else THE (c, cs');
fi;
NULL => NULL;
esac;
fun get_opt_sign (next as THE (c, cs))
=>
if (p.w_okay) get_opt0 (FALSE, THE (c, cs));
elif (c == plus_code) get_opt0 (FALSE, get_next cs);
elif (c == minus_code) get_opt0 (TRUE, get_next cs);
else get_opt0 (FALSE, next);
fi;
get_opt_sign NULL => NULL;
end
also
fun get_opt0 (neg, THE (c, cs))
=>
if (c == 0u0
and (p.w_okay or p.x_okay)
)
get_opt_w (neg, (c, cs), get_next cs);
else
finish (neg, (c, cs));
fi;
get_opt0 (neg, NULL) => NULL;
end
also
fun get_opt_w (neg, saved_cs, arg as THE (c, cs))
=>
if (c == w_code and p.w_okay) get_opt_x (neg, saved_cs, get_next cs);
else get_opt_x (neg, saved_cs, arg);
fi;
get_opt_w (neg, saved_cs, NULL)
=>
finish (neg, saved_cs);
end
also
fun get_opt_x (neg, saved_cs, NULL)
=>
finish (neg, saved_cs);
get_opt_x (neg, saved_cs, arg as THE (c, cs))
=>
if (c == x_code and p.x_okay) check_digit (neg, saved_cs, get_next cs);
else check_digit (neg, saved_cs, arg);
fi;
end
also
fun check_digit (neg, saved_cs, THE (c, cs))
=>
if (p.is_digit c) THE { neg, next => c, rest => cs };
else finish (neg, saved_cs);
fi;
check_digit (neg, saved_cs, NULL)
=>
finish (neg, saved_cs);
end
also
fun finish (neg, (c, cs))
=
if ((p.is_digit c) or ((c == pt_code) and p.pt_okay))
#
THE { neg, next => c, rest => cs };
else
NULL;
fi;
end; # fun scan_prefix
# For power of 2 bases (2, 8 & 16),
# we can check for overflow by looking
# at the hi (1, 3 or 4) bits.
#
fun check_overflow mask w
=
if (u32::bitwise_and (mask, w) != 0u0) raise exception OVERFLOW; fi;
fun is_bin_digit d = (d < 0u2);
fun is_oct_digit d = (d < 0u8);
fun is_dec_digit d = (d < 0u10);
fun is_hex_digit d = (d < 0u16);
fun bin_pattern w_okay = { w_okay, x_okay=>FALSE, pt_okay=>FALSE, is_digit=>is_bin_digit };
fun oct_pattern w_okay = { w_okay, x_okay=>FALSE, pt_okay=>FALSE, is_digit=>is_oct_digit };
fun hex_pattern w_okay = { w_okay, x_okay=>TRUE, pt_okay=>FALSE, is_digit=>is_hex_digit };
fun dec_pattern (w_okay, pt_okay)
=
{ w_okay, x_okay=>FALSE, pt_okay,
is_digit=>is_dec_digit };
fun scan_bin is_word getc cs
=
case (scan_prefix (bin_pattern is_word) getc cs)
#
THE { neg, next, rest }
=>
convert (next, rest)
where
check_overflow
=
check_overflow 0ux80000000;
fun convert (w, rest)
=
case (getc rest)
#
THE (c, rest')
=>
{ d = code c;
#
if (is_bin_digit d)
#
check_overflow w;
convert (u32::(+) (u32::lshift (w, 0u1), d), rest');
else
THE { neg, word=>w, rest };
fi;
};
NULL => THE { neg, word=>w, rest };
esac;
end;
NULL => NULL;
esac;
fun scan_oct is_word getc cs
=
case (scan_prefix (oct_pattern is_word) getc cs)
#
THE { neg, next, rest }
=>
convert (next, rest)
where
check_overflow
=
check_overflow 0uxE0000000;
fun convert (w, rest)
=
case (getc rest)
#
THE (c, rest')
=>
{ d = code c;
#
if (is_oct_digit d)
#
check_overflow w;
convert (u32::(+) (u32::lshift (w, 0u3), d), rest');
else
THE { neg, word=>w, rest };
fi;
};
NULL => THE { neg, word=>w, rest };
esac;
end;
NULL => NULL;
esac;
fun scan_dec is_word getc cs
=
case (scan_prefix (dec_pattern (is_word, FALSE)) getc cs)
#
THE { neg, next, rest }
=>
convert (next, rest)
where
fun convert (w, rest)
=
case (getc rest)
#
THE (c, rest')
=>
{ d = code c;
#
if (is_dec_digit d)
#
if ((w >= largest_word_div10)
and ((largest_word_div10 < w)
or (largest_word_mod10 < d))
)
raise exception OVERFLOW;
fi;
convert (0u10*w+d, rest');
else
THE { neg, word=>w, rest };
fi;
};
NULL => THE { neg, word=>w, rest };
esac;
end;
NULL => NULL;
esac;
fun scan_hex is_word getc cs
=
case (scan_prefix (hex_pattern is_word) getc cs)
#
THE { neg, next, rest }
=>
convert (next, rest)
where
check_overflow
=
check_overflow 0uxF0000000;
fun convert (w, rest)
=
case (getc rest)
#
THE (c, rest')
=>
{ d = code c;
#
if (is_hex_digit d)
#
check_overflow w;
convert (u32::(+) (u32::lshift (w, 0u4), d), rest');
else
THE { neg, word=>w, rest };
fi;
};
NULL => THE { neg, word=>w, rest };
esac;
end;
NULL => NULL;
esac;
fun final_word scan_g getc cs
=
case (scan_g TRUE getc cs)
#
THE { neg, word, rest }
=>
THE (word, rest);
NULL => NULL;
esac;
fun scan_word ns::BINARY => final_word scan_bin;
scan_word ns::OCTAL => final_word scan_oct;
scan_word ns::DECIMAL => final_word scan_dec;
scan_word ns::HEX => final_word scan_hex;
end;
stipulate
fromword32 = i32::from_large o u32::to_large_int_x;
herein
fun final_int scan_g getc cs
=
case (scan_g FALSE getc cs)
#
THE { neg=>TRUE, word, rest }
=>
if (word < largest_neg_int1)
#
THE (it::i1::neg(fromword32 word), rest);
else
if (largest_neg_int1 < word)
#
raise exception OVERFLOW;
else
THE (min_int1, rest);
fi;
fi;
THE { word, rest, ... }
=>
if (largest_pos_int1 < word)
#
raise exception OVERFLOW;
else
THE (fromword32 word, rest);
fi;
NULL => NULL;
esac;
end;
fun scan_int ns::BINARY => final_int scan_bin;
scan_int ns::OCTAL => final_int scan_oct;
scan_int ns::DECIMAL => final_int scan_dec;
scan_int ns::HEX => final_int scan_hex;
end;
# Scan a string of decimal digits (starting with d), and return their
# value as a real number. Also return the number of digits, and the
# rest of the stream.
#
fun fscan10 getc (d, cs)
=
{ fun word_to_real w
=
it::f64::from_tagged_int (u32::to_int_x w);
fun scan (accum, n, cs)
=
case (getc cs)
#
THE (c, cs')
=>
{ d = code c;
#
if (is_dec_digit d)
#
scan (r::(+) (r::(*) (10.0, accum), word_to_real d), ti::(+) (n, 1), cs');
else
THE (accum, n, cs);
fi;
};
NULL => THE (accum, n, cs);
esac;
if (is_dec_digit d) scan (word_to_real d, 1, cs);
else NULL;
fi;
};
stipulate
neg_table = #[
1.0E-0, 1.0E-1, 1.0E-2, 1.0E-3, 1.0E-4,
1.0E-5, 1.0E-6, 1.0E-7, 1.0E-8, 1.0E-9
];
pos_table = #[
1.0E0, 1.0E1, 1.0E2, 1.0E3, 1.0E4,
1.0E5, 1.0E6, 1.0E7, 1.0E8, 1.0E9
];
fun scale (table, step10: Float)
=
f
where
fun f (r, 0) => r;
#
f (r, exp)
=>
if (ti::(<) (exp, 10))
#
(r::(*) (r, it::poly_vector::get (table, exp)));
else
f (r::(*) (step10, r), ti::(-) (exp, 10));
fi;
end;
end;
herein
scale_up = scale (pos_table, 1.0E10);
scale_down = scale (neg_table, 1.0E-10);
end;
fun scan_real getc cs
=
{ fun scan10 cs
=
case (getc cs)
#
THE (c, cs) => fscan10 getc (code c, cs);
NULL => NULL;
esac;
fun get_frac rest
=
case (scan10 rest)
#
THE (frac, n, rest)
=>
THE (scale_down (frac, n), rest);
NULL => NULL;
esac;
fun negate (TRUE, num) => r::neg num;
negate (FALSE, num) => num;
end;
fun scan_expression cs
=
case (getc cs)
#
THE (c, cs)
=>
{ d = code c;
#
fun scan (accum, cs)
=
case (getc cs)
#
THE (c, cs')
=>
{ d = code c;
#
if (is_dec_digit d) scan (ti::(+) (ti::(*) (accum, 10), u32::to_int_x d), cs');
else (accum, cs);
fi;
};
NULL => (accum, cs);
esac;
if (is_dec_digit d) THE (scan (u32::to_int_x d, cs));
else NULL;
fi;
};
NULL => NULL;
esac;
fun get_expression (num, cs)
=
case (getc cs)
#
THE (c, cs1)
=>
if (code c == e_code)
#
case (getc cs1)
#
THE (c, cs2)
=>
{ code_c = code c;
#
my (is_neg, cs3)
=
if (code_c == minus_code)
#
(TRUE, cs2);
else
code_c == plus_code
?? (FALSE, cs2)
:: (FALSE, cs1);
fi; # no sign
case (scan_expression cs3)
#
THE (exp, cs4)
=>
THE ( is_neg ?? scale_down (num, exp)
:: scale_up (num, exp),
cs4
);
NULL => THE (num, cs);
esac;
};
NULL => THE (num, cs);
esac;
else
THE (num, cs);
fi;
NULL => THE (num, cs);
esac;
case (scan_prefix (dec_pattern (FALSE, TRUE)) getc cs)
#
THE { neg, next, rest }
=>
if (next == pt_code) # initial point after prefix
#
case (get_frac rest)
#
THE (frac, rest)
=>
get_expression (negate (neg, frac), rest);
NULL => NULL;
esac; # initial point not followed by digit
else
# ASSERT: next must be a digit
# get whole number part
case (fscan10 getc (next, rest))
#
THE (whole, _, rest)
=>
case (getc rest)
#
THE ('.', rest')
=>
# Whole part followed by point,
# get fraction:
#
case (get_frac rest')
#
THE (frac, rest'')
=>
# Fraction exists:
#
get_expression (negate (neg, r::(+) (whole, frac)), rest'');
NULL => THE (negate (neg, whole), rest); # No fraction -- point terminates num.
esac;
_ => get_expression (negate (neg, whole), rest);
esac;
NULL => NULL; # ASSERT: this case can't happen
esac;
fi;
NULL => NULL;
esac;
}; # fun scan_real
};
end;