## int-chartype.pkg
#
# Predicates on characters. This is modelled after the Unix C libraries.
# Each predicate comes in two forms; one that works on integers, and one
# that works on an arbitrary character in a string. The meanings of these
# predicates are documented in Section 3 of the Unix manual.
# Compiled by:
#
src/lib/std/src/standard-core.sublib# See also:
#
src/lib/std/src/char.pkg#
src/lib/std/src/string-chartype.pkg # Int_Chartype is from
src/lib/std/src/int-chartype.apipackage int_chartype: Int_Chartype {
my itoc: Int -> Char = inline_t::cast;
my ctoi: Char -> Int = inline_t::cast;
# For each character code we have an 8-bit vector, which is interpreted
# as follows:
# 0x01 == set for upper-case letters
# 0x02 == set for lower-case letters
# 0x04 == set for digits
# 0x08 == set for white space characters
# 0x10 == set for punctuation characters
# 0x20 == set for control characters
# 0x40 == set for hexadecimal characters
# 0x80 == set for SPACE
ctype_table = "\
\\x20\x20\x20\x20\x20\x20\x20\x20\x20\x28\x28\x28\x28\x28\x20\x20\
\\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
\\x88\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\
\\x44\x44\x44\x44\x44\x44\x44\x44\x44\x44\x10\x10\x10\x10\x10\x10\
\\x10\x41\x41\x41\x41\x41\x41\x01\x01\x01\x01\x01\x01\x01\x01\x01\
\\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x10\x10\x10\x10\x10\
\\x10\x42\x42\x42\x42\x42\x42\x02\x02\x02\x02\x02\x02\x02\x02\x02\
\\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x10\x10\x10\x10\x20\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\";
# XXX BUGGO FIXME This table is duplicated from char.pkg, should share it.
fun in_set (c, s)
=
{ m = to_int (inline_t::vector_of_chars::get_byte_as_char (ctype_table, c));
#
inline_t::default_int::bitwise_and (m, s) != 0;
};
# fun in_set (c, s)
# =
# (string::get_byte_as_char (ctype_table, c) & s) != 0;
# Predicates on integer coding of Ascii values
#
fun is_alpha i = in_set (i, 0x03) except _ = FALSE;
fun is_upper i = in_set (i, 0x01) except _ = FALSE;
fun is_lower i = in_set (i, 0x02) except _ = FALSE;
fun is_digit i = in_set (i, 0x04) except _ = FALSE;
fun is_hex_digit i = in_set (i, 0x40) except _ = FALSE;
fun is_alphanumeric i = in_set (i, 0x07) except _ = FALSE;
fun is_space i = in_set (i, 0x08) except _ = FALSE;
fun is_punct i = in_set (i, 0x10) except _ = FALSE;
fun is_graph i = in_set (i, 0x17) except _ = FALSE;
fun is_print i = in_set (i, 0x97) except _ = FALSE;
fun is_cntrl i = in_set (i, 0x20) except _ = FALSE;
#
fun is_ascii i = 0 <= i and i < 128;
# Conversion routines
#
fun to_ascii i = (i & 0x7F);
fun to_upper i = is_lower i ?? i - 32 :: i;
fun to_lower i = is_upper i ?? i + 32 :: i;
}; # package int_chartype