# ***************************************************************************
# A regular expression matcher.
# Original Author: Ryan Stansifer <ryan@ponder.csci.unt.edu>
# ***************************************************************************
# Compiled by:
#
src/lib/tk/src/toolkit/regExp/sources.sublib# globber.pkg -- Regular expression matcher with globber-matcher syntax.
# see "man glob"
# Ryan Stansifer (ryan@cs.unt.edu) at Sat Sep 18 11:14:39 1993
# The function match takes a regular expression and
# matches it against a string. A regular expression has meta-symbols
# "*", "?", "{", "}", "\\", ", "
#
# Examples:
#
# Globber::match "dfg*fg { qwr, fgh }" "dfgbhjqwr";
#
# Globber::match "dfg*fg { qwr, fgh }" "dfgbhjfgqwr";
#
# Globber::match "dfg" "dfg";
package globber: (weak) Match { # Match is from
src/lib/tk/src/toolkit/regExp/match.api
exists = list::exists;
fun fold f l s = list::fold_backward f s l;
# Parsetree for regular expressions:
#
Leaf = CHAR Char
| ANY
| EOE
;
Rex = CAT (Rex, Rex) # Concatenation of two regular expressions
| EPSILON
# Denotes set containing empty string
| ALT (Rex, Rex)
| STAR Rex
| LEAF Leaf
;
stipulate
# Parser builders from Reade, page 216.
infixr my 50 & ;
infixr my 40
| ;
infix my 10 @@ ;
exception REJECT;
fun (p @@ f) s = { my (x, y) = p s; (f x, y); };
fun (p1 & p2) s = { my (x, s') = p1 s; (p2 @@ (\\ y => (x, y); end )) s'; };
fun (p1
| p2) s = (p1 s) except REJECT => (p2 s); end ;
fun optional pr = ((pr @@ (\\ x => THE x; end ))
| (\\ s => (NULL, s); end ));
/* [These are not used in the grammar for regular expressions, but
are useful in other grammars.]
# The argument s to sequence is critical to avoid infinite loop.
fun sequence pr s = (((pr & sequence pr)@@(op . ))
| (\\ s => ([], s))) s;
fun one_or_more pr = (pr & sequence pr) @@(op . )
*/
/*
The following grammar is used to parse strings into regular expressions.
rx ::= sx [ ", " rx ]
ax ::= char
ax ::= "*"
ax ::= "?"
ax ::= "{" rx "}"
ax ::= "\" meta
sx ::= ax [sx]
*/
# lexical base functions
fun
literal c (c1 . s) => if (c==c1 ) (c, s); else raise exception REJECT;fi;
literal c _ => raise exception REJECT; end;
fun is_meta c
=
exists (\\ x => x==c; end ) ['*', '?', '{', '}',
'\\', ','];
fun character (c . s) => if (is_meta c) raise exception REJECT; else (c, s);fi;
character (_) => raise exception REJECT;
end;
fun meta (c . s) => if (is_meta (c) ) (c, s); else raise exception REJECT;fi;
meta (_) => raise exception REJECT;
end;
char_meta = character
| ((literal '\\') & meta @@ (\\ (_, m) => m; end ));
suffix = ((literal '*')
|(literal '+')|(literal '?'));
# Attribute functions
fun
f (r, NULL) => r;
f (r, THE (_, s)) => alt (r, s); end ;
fun
h (r, NULL) => r;
h (r, (THE s)) => cat (r, s); end ;
# Cf grammar parsing functions
fun
st s = (sx @@ (\\ x = cat (x, leaf eoe))) s
also fun
rx s = (sx & (optional (literal ',' & rx)) @@ f) s
also fun
sx s = (ax & (optional sx) @@ h) s
also fun
ax s = (
(character @@ (\\ c = leaf (char c)))
|
((literal '*') @@ (\\ c = star (leaf (any))))
|
((literal '?') @@ (\\ c = leaf (any)))
|
((literal '{') & rx & (literal '}') @@ (\\ (_, (r, _)) = r))
|
((literal '\\')& meta @@ (\\ (_, c) = leaf (char c)))
) s;
herein
# Parse: parse a string, character by
# character into a regular expression
#
stipulate
fun f (x, NIL) => x;
f _ => raise exception REJECT;
end;
herein
exception BAD_EXPRESSION;
fun parse rex
=
f (st (explode rex))
except
REJECT = raise exception BAD_EXPRESSION;
end;
end;
# Compute the following positions
fun e_fn n
=
int_binary_set::empty;
fun update s (i, f) n
=
if (i == n) int_binary_set::union (s, f i);
else f n;
fi;
fun comp (f, g) n
=
int_binary_set::union (f n, g n);
# look_up -- find value associated with key in a list of pairs.
exception NOT_FOUND;
fun
look_up (x, NIL) => raise exception NOT_FOUND;
look_up (x, (key, value) . rest)=> if (x==key ) value; else look_up (x, rest);fi; end;
# dfs -- Compute:
# nullable
# first posion
# last postion
# mapping from dfs number to leaf element
# mapping from position to set of following positions
#
# See: Aho, Sethi, Ullman, section 3.9
#
fun dfs n (leaf x)
=>
(FALSE, n+1, int_binary_set::singleton n, int_binary_set::singleton n, [(n, x)], e_fn);
dfs n (epsilon)
=>
(TRUE, n, int_binary_set::empty, int_binary_set::empty, NIL, e_fn);
dfs n (star r)
=>
{ my (_, d1, f1, l1, t, w) = dfs n r;
follow = fold (update f1) (int_binary_set::vals_list l1) w;
(TRUE, d1, f1, l1, t, follow);
};
dfs n (cat (r, s))
=>
{ my (n1, d1, f1, l1, t1, w1) = dfs n r;
my (n2, d2, f2, l2, t2, w2) = dfs d1 s;
first = if n1 int_binary_set::union (f1, f2); else f1;fi;
last = if n2 int_binary_set::union (l1, l2); else l2;fi;
follow= fold (update f2) (int_binary_set::vals_list l1) (comp (w1, w2));
(n1 and n2, d2, first, last, t1@t2, follow);
};
dfs n (alt (r, s))
=>
{ my (n1, d1, f1, l1, t1, w1) = dfs n r;
my (n2, d2, f2, l2, t2, w2) = dfs d1 s;
nullable = n1 or n2;
follow = comp (w1, w2);
(nullable, d2, int_binary_set::union (f1, f2), int_binary_set::union (l1, l2), t1@t2, follow);
};
end;
Nfa_Type
=
{ start: int_binary_set::Set,
edges: vector::Vector( Leaf ),
trans: vector::Vector( int_binary_set::Set ),
final: Int
};
fun construct (_, b, c, _, e, f) : Nfa_Type
=
{ start => c,
edges => vector::from_fn (b, (\\ x= look_up (x, e))),
trans => vector::from_fn (b, f),
final => (b - 1)
};
fun next (s, a, NFA: Nfa_Type)
=
fold g pos int_binary_set::empty
where
mmm = NFA.edges;
nnn = NFA.trans;
fun
p (any, x) => TRUE;
p (y, x) => (x==y); end;
fun f (x, y)
=
if (p (vector::get (mmm, x), a) ) x . y; else y;fi;
pos = fold f (int_binary_set::vals_list s) [];
fun g (x, y)
=
int_binary_set::union (y, vector::get (nnn, x));
end;
stipulate
fun loop ( { final=>f, ... }, state, NIL)
=>
int_binary_set::member (state, f);
loop (NFA, state, h . t)
=>
(not (int_binary_set::equal (state, int_binary_set::empty)))
and
loop (NFA, (next (state, char h, NFA)), t);
end;
herein
fun interpret (NFA as { start=>s, ... }, x)
=
loop (NFA, s, (string::explode x));
end;
# A tip from La Monte H Yarroll <piggy@hilbert.maths.utas.edu.au> on
# Mon Apr 18 08:04:02 CDT 1994.
#
# fun match pat obj = interpret (construct (dfs 0 (parse pat)), obj);
#
# is much less efficient than:
#
fun match pat
=
{
nfa = construct (dfs 0 (parse pat));
\\ obj => interpret (nfa, obj); end ;
};
}; # package Rex