


## string-guts.pkg
# Compiled by:
# src/lib/std/src/standard-core.sublib### "Harp not on that string."
###
### -- William Shakespeare, "Henry VI"
stipulate
package chr = char; # char is from src/lib/std/src/char.pkg package it = inline_t; # inline_t is from src/lib/core/init/built-in.pkg package ps = protostring; # protostring is from src/lib/std/src/protostring.pkg package rt = runtime; # runtime is from src/lib/core/init/runtime.pkg package xg = exceptions_guts; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkgherein
package string_guts
: (weak) String # String is from src/lib/std/src/string.api {
(+) = it::default_int::(+);
(-) = it::default_int::(-);
(<) = it::default_int::(<);
(<=) = it::default_int::(<=);
(>) = it::default_int::(>);
(>=) = it::default_int::(>=);
# (==) = it::(==);
unsafe_get = it::vector_of_chars::get;
unsafe_set = it::vector_of_chars::set;
# List reverse
#
fun reverse ([], l) => l;
reverse (x ! r, l) => reverse (r, x ! l);
end;
Char = Char;
String = String;
maximum_vector_length = core::maximum_vector_length;
# The length of a string:
#
length = it::vector_of_chars::length;
unsafe_create
=
rt::asm::make_string;
# Allocate an uninitialized string of given length
#
fun create n
=
if (it::default_int::ltu (maximum_vector_length, n))
#
raise exception xg::SIZE;
else
rt::asm::make_string n;
fi;
# Convert a character into a single character string
#
fun from_char (c: chr::Char) : String
=
it::poly_vector::get (ps::chars, it::cast c);
# Get a character from a string
#
get = it::vector_of_chars::check_sub: (String, Int) -> Char;
# The (_[]) enables 'vec[index]' notation;
#
my (_[]): (String, Int) -> Char
=
it::vector_of_chars::check_sub;
# Return the n-character substring of s starting at position i.
# NOTE: we use words to check the right bound so as to avoid
# raising overflow.
#
stipulate
package w = it::default_unt; # inline_t is from src/lib/core/init/built-in.pkg herein
fun substring (s, i, n)
=
if (((i < 0) or (n < 0)
or
w::(<) (w::from_int (size s), w::(+) (w::from_int i, w::from_int n)))
)
raise exception xg::SUBSCRIPT; # exceptions_guts is from src/lib/std/src/exceptions-guts.pkg else
ps::unsafe_substring (s, i, n);
fi;
end;
fun extract (v, base, opt_len)
=
{ len = size v;
#
fun new_vec n
=
{ new_v = rt::asm::make_string n;
#
fun fill i
=
if (i < n)
#
unsafe_set (new_v, i, unsafe_get (v, base+i));
fill (i+1);
fi;
fill 0;
new_v;
};
case (base, opt_len)
#
(0, NULL) => v;
(_, THE 0)
=>
if (base < 0 or len < base)
#
raise exception xg::SUBSCRIPT;
else "";
fi;
(_, NULL)
=>
{ if (base < 0 or len < base) raise exception xg::SUBSCRIPT; fi;
#
if (base == len) "";
else new_vec (len - base);
fi;
};
(_, THE 1)
=>
{ if (base < 0 or len < base+1) raise exception xg::SUBSCRIPT; fi;
#
str (unsafe_get (v, base));
};
(_, THE n)
=>
{ if (base < 0 or n < 0 or len < base+n) raise exception xg::SUBSCRIPT; fi;
#
new_vec n;
};
esac;
};
# Concatenate a list of strings:
#
fun cat [ string ]
=>
string;
cat (sl: List( String ))
=>
{ fun length (i, [])
=>
i;
length (i, s ! rest)
=>
length (i+size s, rest);
end;
case (length (0, sl))
#
0 => "";
1 => find sl
where
fun find ("" ! r) => find r;
find ( s ! _) => s;
find _ => ""; # Impossible.
end;
end;
tot_len
=>
{ ss = create tot_len;
fun copy ([], _)
=>
();
copy (s ! r, i)
=>
{ len = size s;
fun copy' j
=
if (j != len)
#
unsafe_set (ss, i+j, unsafe_get (s, j));
copy'(j+1);
fi;
copy' 0;
copy (r, i+len);
};
end;
copy (sl, 0);
ss;
};
esac;
};
end; # cat
# Concatenate a list of strings using the
# given separator string, so
# join " " ["an", "example"]
# ->
# "an example"
#
fun join _ [] => "";
join _ [x] => x;
join sep (h ! t)
=>
cat (
reverse (
fold_forward
(fn (x, l) = x ! sep ! l)
[h]
t,
[]
)
);
end;
# As above, with null delimiters:
fun implode [] => ""; # Implode a list of characters into a string.
#
implode cl
=>
{ fun length ([], n) => n;
length (_ ! r, n) => length (r, n+1);
end;
ps::implode (length (cl, 0), cl);
};
end;
fun explode s # Explode a string into a list of characters.
=
f (NIL, size s - 1)
where
fun f (l, -1) => l;
f (l, i) => f (unsafe_get (s, i) ! l, i - 1);
end;
end;
fun map f vec
=
case (size vec)
#
0 => "";
#
len => { new_vec = rt::asm::make_string len;
mapf 0
where
fun mapf i
=
if (i < len)
#
unsafe_set (new_vec, i, f (unsafe_get (vec, i)));
mapf (i+1);
fi;
end;
new_vec;
};
esac;
# Map a translation function across the characters of a string
#
fun translate tr s
=
ps::translate (tr, s, 0, size s);
fun tokens is_delimiter s # Tokenize a string using the given predicate
= # to define the delimiter characters.
reverse (scan_token (0, 0, []), [])
where
n = size s;
fun substr (i, j, l)
=
if (i == j) l;
else ps::unsafe_substring (s, i, j-i) ! l;
fi;
fun scan_token (i, j, toks)
=
if (j < n)
#
if (is_delimiter (unsafe_get (s, j))) skip_delimiters (j+1, substr (i, j, toks));
else scan_token (i, j+1, toks);
fi;
else
substr (i, j, toks);
fi
also
fun skip_delimiters (j, toks)
=
if (j < n)
#
if (is_delimiter (unsafe_get (s, j))) skip_delimiters (j+1, toks);
else scan_token (j, j+1, toks);
fi;
else
toks;
fi;
end;
fun fields is_delimiter s
=
{ n = size s;
reverse (scan_token (0, 0, []), [])
where
fun scan_token (i, j, toks)
=
if (j < n)
#
if (is_delimiter (unsafe_get (s, j))) scan_token (j+1, j+1, substr (i, j, toks));
else scan_token (i, j+1, toks);
fi;
else
substr (i, j, toks);
fi
where
fun substr (i, j, l)
=
ps::unsafe_substring (s, i, j-i) ! l;
end;
end;
};
# String comparisons
#
fun is_prefix s1 s2
=
ps::is_prefix (s1, s2, 0, size s2);
fun is_suffix s1 s2
=
{ sz2 = size s2;
#
ps::is_prefix (s1, s2, sz2 - size s1, sz2);
};
fun is_substring s
=
{ stringsearch = ps::kmp s;
#
fun search s'
=
{ epos = size s';
stringsearch (s', 0, epos) < epos;
};
search;
};
fun compare (a, b)
=
ps::compare (a, 0, size a, b, 0, size b);
fun compare_sequences compare_g (a, b)
=
ps::compare_sequences compare_g (a, 0, size a, b, 0, size b);
fun has_alpha string = list::exists chr::is_alpha (explode string); # For efficiency, should really have string::exists and string::all someday. XXX SUCKO FIXME.
fun has_upper string = list::exists chr::is_upper (explode string);
fun has_lower string = list::exists chr::is_lower (explode string);
fun is_alpha string = length string > 0 and list::all chr::is_alpha (explode string);
fun is_upper string = length string > 0 and list::all chr::is_upper (explode string);
fun is_lower string = length string > 0 and list::all chr::is_lower (explode string);
fun is_mixed string = is_alpha string and has_upper string and has_lower string;
# String greater or equal
#
fun string_gt (a, b)
=
compare 0
where
al = size a;
bl = size b;
n = if (al < bl) al;
else bl;
fi;
fun compare i
=
if (i == n)
#
al > bl;
else
ai = unsafe_get (a, i);
bi = unsafe_get (b, i);
chr::(>) (ai, bi)
or
( (ai == bi)
and
compare (i+1)
);
fi;
end;
fun (<=) (a, b) = if (string_gt (a, b) ) FALSE; else TRUE; fi;
fun (<) (a, b) = string_gt (b, a);
fun (>=) (a, b)
=
b <= a;
my (>) = string_gt;
fun from_string' scan_char s
=
accum (0, [])
where
len = size s;
fun getc i
=
if (it::default_int::(<) (i, len))
#
THE (unsafe_get (s, i), i+1);
else
NULL;
fi;
scan_char = scan_char getc;
fun accum (i, chars)
=
case (scan_char i)
#
NULL
=>
if (it::default_int::(<) (i, len)) NULL; # Bad format
else THE (implode (list::reverse chars));
fi;
#
THE (c, i')
=>
accum (i', c ! chars);
esac;
end;
fun (+) ("", s) => s;
(+) (s, "") => s;
(+) (x, y) => ps::meld2 (x, y);
end;
# Concatenate a list of strings using the
# given separator and delimiter strings, so
# join' "(" " " ")" ["an", "example"]
# ->
# "(an example)"
#
fun join' _ _ _ [] => "";
#
join' start _ stop [x] => start + x + stop;
#
join' start sep stop (h ! t)
=>
cat (
start
!
h
!
fold_backward
(fn (x, l) = sep ! x ! l)
[ stop ]
t
);
end;
# Drop trailing newline on string, if present:
#
fun chomp ""
=>
"";
chomp string
=>
{ len = length string;
if (get (string, len - 1) != '\n') string;
else extract (string, 0, THE (len - 1));
fi;
};
end;
# There's a shorter definition of this fn in src/lib/compiler/toplevel/interact/read-eval-print-loop-g.pkg # -- should we use it instead? XXX BUGGO FIXME
to_lower = map chr::to_lower;
to_upper = map chr::to_upper;
fun to_mixed string # "THIS_is_tExt" -> "This_Is_Text"
=
to_mixed' (' ', explode string, [])
where
fun to_mixed' (_, [], chars)
=>
(implode (list::reverse chars));
to_mixed' (last, this ! rest, chars)
=>
if (not (chr::is_alpha this)) to_mixed' (this, rest, this ! chars);
elif (not (chr::is_alpha last)) to_mixed' (this, rest, chr::to_upper this ! chars);
else to_mixed' (this, rest, chr::to_lower this ! chars);
fi;
end;
end;
from_string = from_string' chr::scan;
to_string = translate chr::to_string;
from_cstring = from_string' chr::scan_c;
to_cstring = translate chr::to_cstring;
}; # package string
end;


