## char.pkg
# Compiled by:
#
src/lib/std/src/standard-core.sublib### "Almost everything that distinguishes the modern world
### from earlier centuries is attributable to science,
### which achieved its most spectacular triumphs
### in the seventeenth century."
###
### -- Bertrand Russell
stipulate
package ic = inline_t::char; # inline_t is from
src/lib/core/init/built-in.pkg package it = inline_t; # inline_t is from
src/lib/core/init/built-in.pkg package nf = number_format; # number_format is from
src/lib/std/src/number-format.pkg package ns = number_string; # number_string is from
src/lib/std/src/number-string.pkg package ps = protostring; # protostring is from
src/lib/std/src/protostring.pkg package rt = runtime; # runtime is from src/lib/core/init/built-in.pkg.
package g2d = exceptions_guts; # exceptions_guts is from
src/lib/std/src/exceptions-guts.pkgherein
package char: (weak)
api {
include api Char; # Char is from
src/lib/std/src/char.api scan_c: ns::Reader( Char, X ) -> ns::Reader( Char, X );
#
# Internal scanning function for C-style escape sequences
}
{
(+) = it::default_int::(+);
(-) = it::default_int::(-);
(*) = it::default_int::(*);
itoc = it::cast: Int -> Char;
ctoi = it::cast: Char -> Int;
Char = Char;
String = String;
min_char = ic::chr 0 : Char;
max_char = ic::chr ic::max_ord : Char;
max_ord = ic::max_ord;
fun prior (c: Char) : Char
=
{ c' = ctoi c - 1;
#
if (it::default_int::(<) (c', 0))
#
raise exception g2d::BAD_CHAR; # exceptions_guts is from
src/lib/std/src/exceptions-guts.pkg else
(itoc c');
fi;
};
fun next (c: Char) : Char
=
{ c' = ctoi c + 1;
#
if (it::default_int::(<) (max_ord, c'))
#
raise exception g2d::BAD_CHAR;
else
(itoc c');
fi;
};
from_int = ic::chr;
to_int = ic::ord;
(<) = ic::(<);
(<=) = ic::(<=);
(>) = ic::(>);
(>=) = ic::(>=);
fun compare (c1: Char, c2: Char)
=
if (c1 == c2) EQUAL;
elif (c1 < c2) LESS;
else GREATER;
fi;
# Testing character membership:
#
stipulate
#
fun make_array (s, s_len)
=
{ init 0;
ins 0;
cv;
}
where
cv = rt::asm::make_string (max_ord+1); # "rt" == "runtime" -- from
src/lib/core/init/built-in.pkg #
fun init i
=
if (it::default_int::(<=) (i, max_ord))
#
it::vector_of_chars::set_char_as_byte (cv, i, '\x00');
init (i+1);
fi;
fun ins i
=
if (it::default_int::(<) (i, s_len))
#
it::vector_of_chars::set_char_as_byte (
cv,
to_int (it::vector_of_chars::get_byte_as_char (s, i)),
'\x01'
);
ins (i+1);
fi;
end;
herein
fun contains "" => \\ c = FALSE;
#
contains s
=>
{ s_len = it::vector_of_chars::length s;
#
if (s_len == 1)
#
c' = it::vector_of_chars::get_byte_as_char (s, 0);
#
\\ c = (c == c');
else
cv = make_array (s, s_len);
#
\\ c = (it::vector_of_chars::get_byte_as_char (cv, to_int c) != '\x00');
fi;
};
end;
fun not_contains "" => \\ c = TRUE;
#
not_contains s
=>
{ s_len = it::vector_of_chars::length s;
#
if (s_len == 1)
#
c' = it::vector_of_chars::get_byte_as_char (s, 0);
#
\\ c = c != c';
else
cv = make_array (s, s_len);
#
\\ c = it::vector_of_chars::get_byte_as_char (cv, to_int c) == '\x00';
fi;
};
end;
end; # stipulate
# For each character code we have an 8-bit vector, which is interpreted
# as follows:
# 0x01 == set for upper-case letters
# 0x02 == set for lower-case letters
# 0x04 == set for digits
# 0x08 == set for white space characters
# 0x10 == set for punctuation characters
# 0x20 == set for control characters
# 0x40 == set for hexadecimal characters
# 0x80 == set for SPACE
ctype_table = "\
\\x20\x20\x20\x20\x20\x20\x20\x20\x20\x28\x28\x28\x28\x28\x20\x20\
\\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
\\x88\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\
\\x44\x44\x44\x44\x44\x44\x44\x44\x44\x44\x10\x10\x10\x10\x10\x10\
\\x10\x41\x41\x41\x41\x41\x41\x01\x01\x01\x01\x01\x01\x01\x01\x01\
\\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x10\x10\x10\x10\x10\
\\x10\x42\x42\x42\x42\x42\x42\x02\x02\x02\x02\x02\x02\x02\x02\x02\
\\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x10\x10\x10\x10\x20\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\";
fun in_set (c, s)
=
{ m = to_int (it::vector_of_chars::get_byte_as_char (ctype_table, to_int c));
#
(it::default_int::bitwise_and (m, s) != 0);
};
# Predicates on integer coding of Ascii values:
#
fun is_alpha c = in_set (c, 0x03);
fun is_upper c = in_set (c, 0x01);
fun is_lower c = in_set (c, 0x02);
fun is_digit c = in_set (c, 0x04);
fun is_hex_digit c = in_set (c, 0x40);
fun is_alphanumeric c = in_set (c, 0x07);
fun is_space c = in_set (c, 0x08);
fun is_punct c = in_set (c, 0x10);
fun is_graph c = in_set (c, 0x17);
fun is_print c = in_set (c, 0x97);
fun is_cntrl c = in_set (c, 0x20);
fun is_ascii c = it::default_int::(<) (to_int c, 128);
offset = ctoi 'a' - ctoi 'A';
fun to_upper c = if (is_lower c) itoc (ctoi c - offset); else c;fi;
fun to_lower c = if (is_upper c) itoc (ctoi c + offset); else c;fi;
fun scan_digits is_digit getc n stream
=
scan (stream, n, [])
where
fun scan (stream, 0, l)
=>
(list::reverse l, stream);
scan (stream, i, l)
=>
case (getc stream)
#
NULL => (list::reverse l, stream);
#
THE (c, stream')
=>
is_digit c
?? scan (stream', i - 1, c ! l)
:: (list::reverse l, stream);
esac;
end;
end;
fun check_digits radix (l, stream)
=
case ( number_scan::scan_int radix next l)
#
THE (i, _)
=>
it::i1::(<) (i, 256)
?? THE (from_int (it::i1::to_int i), stream)
:: NULL;
NULL => NULL;
esac
where
fun next (x ! r) => THE (x, r);
next [] => NULL;
end;
end;
# Conversions between characters
# and printable representations:
fun scan getc
=
scan'
where
fun scan' rep
=
{ fun get2 rep
=
case (getc rep)
#
THE (c1, rep')
=>
case (getc rep')
#
THE (c2, rep'') => THE (c1, c2, rep'');
_ => NULL;
esac;
_ => NULL;
esac;
case (getc rep)
#
NULL => NULL;
#
THE('\\', rep')
=>
case (getc rep')
#
NULL => NULL;
#
THE('\\',rep'') => THE('\\', rep'');
THE('"', rep'') => THE('"', rep'');
THE('a', rep'') => THE('\a', rep'');
THE('b', rep'') => THE('\b', rep'');
THE('t', rep'') => THE('\t', rep'');
THE('n', rep'') => THE('\n', rep'');
THE('v', rep'') => THE('\v', rep'');
THE('f', rep'') => THE('\f', rep'');
THE('r', rep'') => THE('\r', rep'');
THE('^', rep'')
=>
case (getc rep'')
#
THE (c, rep''')
=>
if (('@' <= c) and (c <= '_')) THE (from_int (to_int c - to_int '@'), rep''');
else NULL;
fi;
NULL => NULL;
esac;
THE (d1, rep'')
=>
if (is_digit d1)
#
case (get2 rep'')
#
THE (d2, d3, rep''')
=>
{ fun convert d
=
(to_int d - to_int '0');
if (is_digit d2 and is_digit d3)
#
n = 100*(convert d1) + 10*(convert d2) + (convert d3);
if (it::default_int::(<) (n, 256))
THE (from_int n, rep''');
else NULL;
fi;
else
NULL;
fi;
};
NULL => NULL;
esac;
elif (is_space d1)
# Skip over \<ws>+\
#
fun skip_ws stream
=
case (getc stream)
#
NULL => NULL;
THE('\\', stream')
=>
scan' stream';
THE (c, stream')
=>
if (is_space c) skip_ws stream';
else NULL;
fi;
esac;
skip_ws rep'';
else
NULL;
fi;
esac;
THE ('"', rep')
=>
NULL;
THE (c, rep')
=>
if (is_print c) THE (c, rep');
else NULL;
fi;
esac;
}; # fun scan'
end; # fun scan
from_string
=
ns::scan_string scan;
itoa =
(nf::format_int ns::DECIMAL)
o
it::i1::from_int;
fun to_string '\a' => "\\a";
to_string '\b' => "\\b";
to_string '\t' => "\\t";
to_string '\n' => "\\n";
to_string '\v' => "\\v";
to_string '\f' => "\\f";
to_string '\r' => "\\r";
to_string '"' => "\\\"";
to_string '\\' => "\\\\";
to_string c
=>
if (is_print c)
#
it::poly_vector::get (ps::chars, to_int c); # NOTE: we should probably recognize the control characters XXX SUCKO FIXME
else
c' = to_int c;
if (it::default_int::(>) (c', 32)) ps::meld2 ("\\", itoa c');
else ps::meld2 ("\\^", it::poly_vector::get (ps::chars, c'+64));
fi;
fi;
end;
# Scanning function for C escape sequences
fun scan_c getc
=
scan
where
fun is_oct_digit d
=
'0' <= d and
d <= '7';
fun scan stream
=
case (getc stream)
#
NULL => NULL;
THE ('\\', stream')
=>
case (getc stream')
#
NULL => NULL;
THE ('a', stream'') => THE ('\a', stream'');
THE ('b', stream'') => THE ('\b', stream'');
THE ('t', stream'') => THE ('\t', stream'');
THE ('n', stream'') => THE ('\n', stream'');
THE ('v', stream'') => THE ('\v', stream'');
THE ('f', stream'') => THE ('\f', stream'');
THE ('r', stream'') => THE ('\r', stream'');
THE ('\\', stream'') => THE ('\\', stream'');
THE ('"', stream'') => THE ('"', stream'');
THE ('\'', stream'') => THE ('\'', stream'');
THE ('?', stream'') => THE ('?', stream'');
THE ('x', stream'')
=>
# Hex escape code
#
check_digits ns::HEX
(scan_digits is_hex_digit getc -1 stream'');
_ =>
# Should be octal escape code
check_digits ns::OCTAL
(scan_digits is_oct_digit getc 3 stream');
esac;
# NOT SURE ABOUT THE FOLLOWING TWO CASES: XXX BUGGO FIXME
# THE('"', stream'') => NULL; # error --- not escaped
# THE('\'', stream'') => NULL; # error --- not escaped
THE (c, stream'')
=>
if (is_print c) THE (c, stream'');
else NULL;
fi;
esac;
end;
from_cstring
=
ns::scan_string scan_c;
fun to_cstring '\a' => "\\a";
to_cstring '\b' => "\\b";
to_cstring '\t' => "\\t";
to_cstring '\n' => "\\n";
to_cstring '\v' => "\\v";
to_cstring '\f' => "\\f";
to_cstring '\r' => "\\r";
to_cstring '"' => "\\\"";
to_cstring '\\' => "\\\\";
to_cstring '?' => "\\?";
to_cstring '\'' => "\\'";
to_cstring '\x00' => "\\0";
to_cstring c
=>
if (is_print c)
#
it::poly_vector::get (ps::chars, to_int c);
else
i = it::i1::from_int (to_int c);
#
prefix = if (it::i1::(<) (i, 8))
#
"\\00";
else
it::i1::(<) (i, 64)
?? "\\0"
:: "\\";
fi;
ps::meld2 (prefix, nf::format_int ns::OCTAL i);
fi;
end;
nul = '\x00';
ctrl_a = '\x01';
ctrl_b = '\x02';
ctrl_c = '\x03';
ctrl_d = '\x04';
ctrl_e = '\x05';
ctrl_f = '\x06';
ctrl_g = '\x07';
ctrl_h = '\x08';
ctrl_i = '\x09';
ctrl_j = '\x0A'; newline = '\x0A';
ctrl_k = '\x0B';
ctrl_l = '\x0C';
ctrl_m = '\x0D'; return = '\x0D';
ctrl_n = '\x0E';
ctrl_o = '\x0F';
ctrl_p = '\x10';
ctrl_q = '\x11';
ctrl_r = '\x12';
ctrl_s = '\x13';
ctrl_t = '\x14';
ctrl_u = '\x15';
ctrl_v = '\x16';
ctrl_w = '\x17';
ctrl_x = '\x18';
ctrl_y = '\x19';
ctrl_z = '\x1A';
del = '\xFF';
}; # package char
end;