## string-chartype.pkg
#
# Predicates on characters. This is modelled after the Unix C libraries.
# Each predicate comes in two forms; one that works on integers, and one
# that works on an arbitrary character in a string. The meanings of these
# predicates are documented in Section 3 of the Unix manual.
# Compiled by:
#
src/lib/std/src/standard-core.sublib# See also:
#
src/lib/std/src/char.pkg#
src/lib/std/src/int-chartype.pkgpackage string_chartype
: String_Chartype # String_Chartype is from
src/lib/std/src/string-chartype.api{
my itoc: Int -> Char = inline_t::cast;
my ctoi: Char -> Int = inline_t::cast;
# For each character code we have an 8-bit vector, which is interpreted
# as follows:
# 0x01 == set for upper-case letters
# 0x02 == set for lower-case letters
# 0x04 == set for digits
# 0x08 == set for white space characters
# 0x10 == set for punctuation characters
# 0x20 == set for control characters
# 0x40 == set for hexadecimal characters
# 0x80 == set for SPACE
ctype_table = "\
\\x20\x20\x20\x20\x20\x20\x20\x20\x20\x28\x28\x28\x28\x28\x20\x20\
\\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
\\x88\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\
\\x44\x44\x44\x44\x44\x44\x44\x44\x44\x44\x10\x10\x10\x10\x10\x10\
\\x10\x41\x41\x41\x41\x41\x41\x01\x01\x01\x01\x01\x01\x01\x01\x01\
\\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x10\x10\x10\x10\x10\
\\x10\x42\x42\x42\x42\x42\x42\x02\x02\x02\x02\x02\x02\x02\x02\x02\
\\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x10\x10\x10\x10\x20\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\";
# XXX BUGGO FIXME This table is duplicated from char.pkg, should share it.
fun in_set (c, s)
=
{ m = to_int (inline_t::vector_of_chars::get_byte_as_char (ctype_table, c));
#
(inline_t::default_int::bitwise_and (m, s) != 0);
};
# fun in_set (c, s)
# =
# bits::bitwise_and (ro_int8_vec_get (ctype_table, c), s) != 0;
unsafe_get = inline_t::vector_of_chars::get_byte_as_char;
# Predicates on indexed strings
#
fun is_alpha (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x03);
fun is_upper (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x01);
fun is_lower (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x02);
fun is_digit (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x04);
fun is_hex_digit (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x40);
fun is_alphanumeric (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x07);
fun is_space (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x08);
fun is_punct (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x10);
fun is_graph (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x17);
fun is_print (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x97);
fun is_cntrl (s, i) = in_set (ctoi (unsafe_get (s, i)), 0x20);
fun is_ascii (s, i) = ctoi (unsafe_get (s, i)) < 128;
}; # package string_chartype