


## pre-string.pkg
# Compiled by:
# src/lib/core/init/init.cmi
# Some common operations that are used by
# both the 'string' and 'substring' packages.
stipulate
package rt = runtime; # runtime is from src/lib/core/init/built-in.pkg.
#
infix val 80 * / % mod div ;
infix val 70 $ ^ + - ;
infixr val 60 . ! @ << >> >>> ;
infix val 50 > < >= <= == != ;
infix val 40 := o ;
infix val 10 before ;
#
include base_types;
herein
package prestring {
# =========
#
stipulate
include pre_pervasive;
package c = inline_t::char; # inline_t is from src/lib/core/init/built-in.pkg my (+) = inline_t::default_int::(+);
my (-) = inline_t::default_int::(-);
my (*) = inline_t::default_int::(*);
my (quot) = inline_t::default_int::quot;
my (<) = inline_t::default_int::(<);
my (<=) = inline_t::default_int::(<=);
my (>) = inline_t::default_int::(>);
my (>=) = inline_t::default_int::(>=);
# my (==) = inline_t::(==);
unsafe_get = inline_t::vector_of_chars::get;
unsafe_set = inline_t::vector_of_chars::set;
unsafe_create = rt::asm::make_string; # "rt" == "runtime"
max_size = core::max_length;
size = inline_t::vector_of_chars::length;
herein
# Allocate an uninitialized string of given length (with a size check)
fun create n
=
if (inline_t::default_int::ltu (max_size, n))
raise exception core::SIZE;
else
unsafe_create n;
fi;
# A vector of single character strings
#
chars = rt::asm::make_typeagnostic_ro_vector (c::max_ord+1, next 0) # "rt" == "runtime".
where
fun next i
=
if (i <= c::max_ord)
#
s = unsafe_create 1;
unsafe_set (s, 0, c::chr i);
s ! next (i+1);
else
[];
fi;
end;
fun unsafe_substring (_, _, 0)
=>
"";
unsafe_substring (s, i, 1)
=>
inline_t::poly_vector::get (chars, inline_t::cast (unsafe_get (s, i)));
unsafe_substring (s, i, n)
=>
ss
where
ss = unsafe_create n;
fun copy j
=
if (j != n)
unsafe_set (ss, j, unsafe_get (s, i+j));
copy (j+1);
fi;
copy 0;
end;
end;
# Concatenate a pair of non-empty strings:
#
fun meld2 (x, y)
=
ss
where
xl = size x;
yl = size y;
ss = create (xl+yl);
fun copyx n
=
if (n != xl)
#
unsafe_set (ss, n, unsafe_get (x, n));
copyx (n+1);
fi;
fun copyy n
=
if (n != yl)
#
unsafe_set (ss, xl+n, unsafe_get (y, n));
copyy (n+1);
fi;
copyx 0;
copyy 0;
end;
# Given a reverse order list of strings
# and a total length, return the
# concatenation of the list.
#
fun rev_meld (0, _)
=>
"";
rev_meld (1, lst)
=>
find lst
where
fun find ("" ! r) => find r;
find (s ! _) => s;
find _ => ""; # * impossible *
end;
end;
rev_meld (tot_len, lst)
=>
ss
where
ss = create tot_len;
fun copy ([], _)
=>
();
copy (s ! r, i)
=>
{ len = size s;
i = i - len;
fun copy' j
=
if (j != len)
#
unsafe_set (ss, i+j, unsafe_get (s, j));
copy'(j+1);
fi;
copy' 0;
copy (r, i);
};
end;
copy (lst, tot_len);
end;
end;
# Map a translation function across
# the characters of a substring:
#
fun translate (tr, s, i, n)
=
map_list (i, 0, [])
where
stop = i+n;
fun map_list (j, result_length, result_list)
=
if (j < stop)
s' = tr (unsafe_get (s, j));
map_list (j + 1, result_length + size s', s' ! result_list);
else
rev_meld (result_length, result_list);
fi;
end;
# Implode a non-empty list of characters
# into a string where both the length and
# list are given as arguments:
#
fun implode (len, cl)
=
ss
where
ss = create len;
fun copy ([], _)
=>
();
copy (c ! r, i)
=>
{ inline_t::vector_of_chars::set (ss, i, c);
copy (r, i+1);
};
end;
copy (cl, 0);
end;
# Implode a reversed non-empty list of characters
# into a string where both the length and list
# are given as arguments:
#
fun rev_implode (len, cl)
=
ss
where
ss = create len;
fun copy ([], _) => ();
copy (c ! r, i) => { inline_t::vector_of_chars::set (ss, i, c);
copy (r, i - 1);
};
end;
copy (cl, len - 1);
end;
fun is_prefix (s1, s2, i2, n2)
=
(n2 >= n1 and eq (0, i2))
where
n1 = size s1;
fun eq (i, j)
=
i >= n1
or
( unsafe_get (s1, i) == unsafe_get (s2, j)
and
eq (i+1, j+1)
);
end;
fun collate compare_fn (s1, i1, n1, s2, i2, n2)
=
compare' 0
where
my (n, order)
=
if (n1 == n2) (n1, EQUAL);
elif (n1 < n2) (n1, LESS);
else (n2, GREATER);
fi;
fun compare' i
=
if (i == n)
order;
else
c1 = unsafe_get (s1, i1+i);
c2 = unsafe_get (s2, i2+i);
case (compare_fn (c1, c2))
EQUAL => compare' (i+1);
order => order;
esac;
fi;
end;
fun compare (s1, i1, n1, s2, i2, n2)
=
collate compare_fn (s1, i1, n1, s2, i2, n2)
where
fun compare_fn (c1, c2)
=
if (c1 == c2) EQUAL;
elif (c::(>) (c1, c2)) GREATER;
else LESS;
fi;
end;
# Knuth-Morris-Pratt String Matching
#
# my kmp: String -> (String, Int, Int) -> Null_Or( Int )
#
# Find the first string within the second, starting at and
# ending before the given positions.
# Return the starting position of the match
# or the given ending position if there is no match.
#
fun kmp pattern
=
{ psz = size pattern;
next = inline_t::poly_rw_vector::array (psz, -1);
fun pat x = unsafe_get (pattern, x);
fun nxt x = inline_t::poly_rw_vector::get (next, x);
fun setnxt (i, x)
=
inline_t::poly_rw_vector::set (next, i, x);
# trying to fill next at position p (> 0) and higher;
# invariants: x >= 0
# pattern[0..x) = pattern[p-x..p)
# for i in [0..p) :
# pattern[i] <> pattern[next[i]]
# pattern[0..next[i]) = pattern[i-next[i]..i)
fun fill (p, x)
=
if (p >= psz ) ();
elif (pat x == pat p) dnxt (p, nxt x, x + 1);
else dnxt (p, x, nxt x + 1);
fi
also
fun dnxt (p, x, y)
=
{ setnxt (p, x);
fill (p + 1, y);
};
# Once next has been initialized, it serves the following purpose:
# Suppose we are looking at text position t and pattern position
# p. This means that all pattern positions < p have already
# matched the text positions that directly precede t.
# Now, if the text[t] matches pattern[p], then we simply advance
# both t and p and try again.
# However, if the two do not match, then we simply
# try t again, but this time with the pattern position
# given by next[p].
# Success is when p reaches the end of the pattern, failure is
# when t reaches the end of the text without p having reached the
# end of the pattern.
#
fun search (text, start, tsz)
=
{ fun txt x = unsafe_get (text, x);
fun loop (p, t)
=
if (p >= psz ) t - psz;
elif (t >= tsz ) tsz;
elif (p < 0 or pat p == txt t ) loop (p+1, t+1);
else loop (nxt p, t);
fi;
loop (0, start);
};
fill (1, 0);
search;
};
end; # stipulate
}; # prestring
end;


