## substring.pkg
# Compiled by:
# src/lib/core/init/init.cmi
### "There has never been an intelligent person of the age of sixty
### who would consent to live his life over again.
###
### "His or anyone else's."
###
### -- Mark Twain,
### Letters from the Earth
stipulate
package it = inline_t; # inline_t is from
src/lib/core/init/built-in.pkg package ps = protostring; # protostring is from
src/lib/core/init/protostring.pkg #
include package base_types; # base_types is from
src/lib/core/init/built-in.pkg #
infix my 80 * / % mod div ;
infix my 70 $ ^ + - ;
infix my 40 := o ;
infix my 50 > < >= <= != == ;
infixr my 60 . ! @ ;
infix my 10 then ;
herein
package substring
: Substring # Substring is from
src/lib/core/init/substring.api where Char == base_types::Char
where String == base_types::String
=
package {
#
include package proto_pervasive; # proto_pervasive is from
src/lib/core/init/proto-pervasive.pkg package w= it::default_unt; # inline_t is from
src/lib/core/init/built-in.pkg my (+) = it::default_int::(+);
my (-) = it::default_int::(-);
my (<) = it::default_int::(<);
my (<=) = it::default_int::(<=);
my (>) = it::default_int::(>);
my (>=) = it::default_int::(>=);
# my (==) = it::(==);
unsafe_sub = it::vector_of_chars::get_byte_as_char;
string_size = it::vector_of_chars::length;
# list reverse
#
fun reverse ([], l) => l;
reverse (x ! r, l) => reverse (r, x ! l);
end;
Char = base_types::Char;
String = base_types::String;
Substring
=
SUBSTRING (String, Int, Int);
fun burst_substring (SUBSTRING arg)
=
arg;
fun to_string (SUBSTRING arg)
=
ps::unsafe_substring arg;
# NOTE: we use words to check the right bound
# so as to avoid raising overflow.
#
fun make_substring (s, i, n)
=
if (((i < 0) or (n < 0)
or w::(<) (w::from_int (string_size s), w::(+) (w::from_int i, w::from_int n)))
)
raise exception core::INDEX_OUT_OF_BOUNDS;
else
SUBSTRING (s, i, n);
fi;
fun extract (s, i, NULL)
=>
{ len = string_size s;
if ((0 <= i) and (i <= len))
SUBSTRING (s, i, len - i);
else
raise exception core::INDEX_OUT_OF_BOUNDS;
fi;
};
extract (s, i, THE n)
=>
make_substring (s, i, n);
end;
fun from_string s
=
SUBSTRING (s, 0, string_size s);
fun is_empty (SUBSTRING(_, _, 0)) => TRUE;
is_empty _ => FALSE;
end;
fun getc (SUBSTRING (s, i, 0)) => NULL;
getc (SUBSTRING (s, i, n)) => THE (unsafe_sub (s, i), SUBSTRING (s, i+1, n - 1));
end;
fun first (SUBSTRING (s, i, 0)) => NULL;
first (SUBSTRING (s, i, n)) => THE (unsafe_sub (s, i));
end;
fun drop_first k (SUBSTRING (s, i, n))
=
{ if (k < 0 ) raise exception core::INDEX_OUT_OF_BOUNDS; fi;
#
if (k >= n) SUBSTRING (s, i+n, 0);
else SUBSTRING (s, i+k, n-k);
fi;
};
fun drop_last k (SUBSTRING (s, i, n))
=
{ if (k < 0) raise exception core::INDEX_OUT_OF_BOUNDS; fi;
#
if (k >= n) SUBSTRING (s, i, 0);
else SUBSTRING (s, i, n-k);
fi;
};
fun get (SUBSTRING (s, i, n), j)
=
{ if (inline_t::default_int::geu (j, n)) raise exception core::INDEX_OUT_OF_BOUNDS; fi;
#
unsafe_sub (s, i+j);
};
fun size (SUBSTRING(_, _, n))
=
n;
fun make_slice (SUBSTRING (s, i, n), j, NULL)
=>
{ if (j < 0 or j > n) raise exception core::INDEX_OUT_OF_BOUNDS; fi;
#
SUBSTRING (s, i+j, n-j);
};
make_slice (SUBSTRING (s, i, n), j, THE m)
=>
{ # NOTE: we use words to check the right bound
# so as to avoid raising overflow.
#
if (((j < 0)
or (m < 0)
or w::(<) (w::from_int n, w::(+) (w::from_int j, w::from_int m)))
)
raise exception core::INDEX_OUT_OF_BOUNDS;
fi;
SUBSTRING (s, i+j, m);
};
end;
fun cat ssl # Concatenate a list of substrings.
=
ps::rev_meld (length (0, [], ssl))
where
fun length (len, sl, [])
=>
(len, sl);
length (len, sl, (SUBSTRING (s, i, n) ! rest))
=>
length (len + n, ps::unsafe_substring (s, i, n) ! sl, rest);
end;
end;
# Concatenate a list of substrings using the
# given separator string:
#
fun join _ [] => "";
join _ [x] => to_string x;
join sep (h ! t)
=>
{ sep' = from_string sep;
fun loop ([], l) => cat (reverse (l, []));
loop (h ! t, l) => loop (t, h ! sep' ! l);
end;
loop (t, [h]);
};
end;
fun join' _ _ _ [] => "";
join' start _ stop [x] => cat [ (from_string start), x, (from_string stop) ]; # XXX BUGGO FIXME there's likely a better expression here.
join' start sep stop (h ! t)
=>
{ sep' = from_string sep;
fun loop ([], l) => cat (reverse (l, [from_string stop]));
loop (h ! t, l) => loop (t, h ! sep' ! l);
end;
loop (t, [h, from_string start]);
};
end;
# Explode a substring into a list of characters
#
fun explode (SUBSTRING (s, i, n))
=
{ fun f (l, j)
=
if (j < i)
l;
else
f (unsafe_sub (s, j) ! l, j - 1);
fi;
f (NIL, (i + n) - 1);
};
# substring comparisons
#
fun is_prefix s1 (SUBSTRING (s2, i2, n2))
=
ps::is_prefix (s1, s2, i2, n2);
fun is_suffix s1 (SUBSTRING (s2, i2, n2))
=
ps::is_prefix (s1, s2, i2 + n2 - string_size s1, n2);
fun is_substring s
=
search
where
stringsearch = ps::knuth_morris_pratt_string_match s;
#
fun search (SUBSTRING (s', i, n))
=
{ epos = i + n;
#
stringsearch (s', i, epos) < epos;
};
end;
fun compare (SUBSTRING (s1, i1, n1), SUBSTRING (s2, i2, n2))
=
ps::compare (s1, i1, n1, s2, i2, n2);
fun compare_sequences compare_g (SUBSTRING (s1, i1, n1), SUBSTRING (s2, i2, n2))
=
ps::compare_sequences compare_g (s1, i1, n1, s2, i2, n2);
fun split_at (SUBSTRING (s, i, n), k)
=
{ if (it::default_int::ltu (n, k)) raise exception core::INDEX_OUT_OF_BOUNDS; fi;
#
( SUBSTRING (s, i, k),
SUBSTRING (s, i+k, n-k)
);
};
stipulate
# Call 'chop' on the longest prefix of substring
# for which 'predicate' is true of each character:
#
fun scan_from_left chop predicate (SUBSTRING (s, i, n))
=
chop (s, i, n, scan i - i)
where
stop = i + n;
#
fun scan j
=
if (j != stop and predicate (unsafe_sub (s, j)))
#
scan (j+1);
else
j;
fi;
end;
# Call 'chop' on the longest suffix of substring
# for which 'predicate' is true of each character:
#
fun scan_from_right chop predicate (SUBSTRING (s, i, n))
=
{ stop = i - 1;
fun scan j
=
if (j != stop and predicate (unsafe_sub (s, j)))
scan (j - 1);
else
j;
fi;
chop (s, i, n, (scan (i+n - 1) - i) + 1);
};
herein
# Return the longest prefix/suffix
# whose chars each satisfy predicate.
#
# These have type (Char -> Bool) -> Substring -> Substring
#
get_prefix = scan_from_left (\\ (s, i, n, k) = SUBSTRING (s, i, k));
get_suffix = scan_from_right (\\ (s, i, n, k) = SUBSTRING (s, i+k, n-k));
# Opposite of above: return all of string
# except longest prefix/suffix whose chars
# satisfy predicate.
#
# These also have type (Char -> Bool) -> Substring -> Substring
#
drop_prefix = scan_from_left (\\ (s, i, n, k) = SUBSTRING (s, i+k, n-k));
drop_suffix = scan_from_right (\\ (s, i, n, k) = SUBSTRING (s, i, k));
# Split substring into two substrings:
# First is the longest prefix whose chars
# all satisfy given predicate, second is the rest:
#
# This has type (Char -> Bool) -> Substring -> (Substring, Substring)
#
split_off_prefix
=
scan_from_left
(\\ (s, i, n, k) = (SUBSTRING (s, i, k), SUBSTRING (s, i+k, n-k)));
# Converse of above: Split substring into
# two substrings, second of which is the
# longest suffix whose chars all satisfy
# given predicate, first of which is the rest:
#
# This also has type (Char -> Bool) -> Substring -> (Substring, Substring)
#
split_off_suffix = scan_from_right (\\ (s, i, n, k) = (SUBSTRING (s, i, k), SUBSTRING (s, i+k, n-k)));
end; # with
fun position s # This is using the Knuth-Morris-Pratt matcher from protostring.
=
search
where
stringsearch = ps::knuth_morris_pratt_string_match s;
fun search (ss as SUBSTRING (s', i, n))
=
{ epos = i + n;
match = stringsearch (s', i, epos);
(SUBSTRING (s', i, match - i), SUBSTRING (s', match, epos - match));
};
end;
fun span (SUBSTRING (s1, i1, n1), SUBSTRING (s2, i2, n2))
=
{ if (s1 != s2
or i1 > i2 + n2
)
raise exception SPAN;
fi;
SUBSTRING (s1, i1, (i2+n2)-i1);
};
fun translate tr (SUBSTRING (s, i, n))
=
ps::translate (tr, s, i, n);
fun tokens is_delim (SUBSTRING (s, i, n))
=
{ stop = i+n;
fun substr (i, j, l)
=
if (i == j)
l;
else
SUBSTRING (s, i, j-i) ! l;
fi;
fun scan_tok (i, j, toks)
=
if (j < stop)
if (is_delim (unsafe_sub (s, j)))
skip_sep (j+1, substr (i, j, toks));
else scan_tok (i, j+1, toks); fi;
else
substr (i, j, toks);
fi
also
fun skip_sep (j, toks)
=
if (j < stop)
if (is_delim (unsafe_sub (s, j)))
skip_sep (j+1, toks);
else scan_tok (j, j+1, toks); fi;
else
toks;
fi;
reverse (scan_tok (i, i, []), []);
};
fun fields is_delim (SUBSTRING (s, i, n))
=
{ stop = i+n;
fun substr (i, j, l)
=
SUBSTRING (s, i, j-i) ! l;
fun scan_tok (i, j, toks)
=
if (j < stop)
if (is_delim (unsafe_sub (s, j)))
scan_tok (j+1, j+1, substr (i, j, toks));
else scan_tok (i, j+1, toks); fi;
else
substr (i, j, toks);
fi;
reverse (scan_tok (i, i, []), []);
};
fun fold_forward f init (SUBSTRING (s, i, n))
=
iter (i, init)
where
stop = i+n;
fun iter (j, accum)
=
if (j < stop)
iter (j+1, f (unsafe_sub (s, j), accum));
else
accum;
fi;
end;
fun fold_backward f init (SUBSTRING (s, i, n))
=
iter (i+n - 1, init)
where
fun iter (j, accum)
=
if (j >= i)
iter (j - 1, f (unsafe_sub (s, j), accum));
else
accum;
fi;
end;
fun apply f (SUBSTRING (s, i, n))
=
iter i
where
stop = i + n;
fun iter j
=
if (j < stop)
f (unsafe_sub (s, j));
iter (j+1);
fi;
end;
}; # package substring.
end;