## dfa.pkg
#
# Deterministic finite-state machines.
# Compiled by:
#
src/lib/std/standard.libstipulate
package fil = file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkg package is = nfa::int_set;
package nfa = nfa; # nfa is from
src/lib/regex/backend/nfa.pkg package int_set_set
=
list_set_g (
Key = is::Set;
compare = is::compare;
);
package int2set
=
list_set_g (
#
Key = (Int, Int);
fun compare ((i1, i2), (j1, j2))
=
case (int::compare (i1, j1))
#
EQUAL => int::compare (i2, j2);
v => v;
esac;
);
package char_set
=
list_set_g (
Key = Char;
compare = char::compare;
);
package iss = int_set_set;
package i2 = int2set;
package cs = char_set;
package a2 = rw_matrix; # rw_matrix is from
src/lib/std/src/rw-matrix.pkg package rwv = rw_vector; # rw_vector is from
src/lib/std/src/rw-vector.pkg package map
=
list_map_g (
Key = is::Set;
compare = is::compare;
);
fun compare_char_option (NULL, NULL) => EQUAL;
compare_char_option (NULL, THE (c)) => LESS;
compare_char_option (THE (c), NULL) => GREATER;
compare_char_option (THE (c), THE (c')) => char::compare (c, c');
end;
herein
package dfa
: (weak) Dfa # Dfa is from
src/lib/regex/backend/dfa.api {
exception SYNTAX_NOT_HANDLED;
Move = MOVE (Int, Null_Or( Char ), Int);
package mvs # "mvs" == "move_set".
=
list_set_g (
#
Key = Move;
fun compare (MOVE (i, c, j), MOVE (i', c', j'))
=
case (int::compare (i, i'))
EQUAL => case (compare_char_option (c, c'))
EQUAL => int::compare (j, j');
v => v;
esac;
v => v;
esac;
);
# Create sets from lists
fun i_list l = is::add_list (is::empty, l);
fun m_list l = mvs::add_list (mvs::empty, l);
Dfa = DFA { states: is::Set,
moves: mvs::Set,
accepting: i2::Set,
table: a2::Rw_Matrix( Null_Or( Int ) ),
acc_table: rwv::Rw_Vector( Null_Or( Int ) ),
start_table: rwv::Rw_Vector( Bool )
};
fun print (DFA { states, moves, accepting, ... } )
=
{ pr = fil::print;
pr_i = fil::print o int::to_string;
pr_i2 = fil::print o (\\ (i1, i2) => int::to_string i1; end );
pr_c = fil::print o char::to_string;
pr ("States: 0 -> ");
pr_i (is::vals_count (states) - 1);
pr "\nAccepting:";
i2::apply (\\ k = { pr " "; pr_i2 k;})
accepting;
pr "\nMoves\n";
mvs::apply
\\ (MOVE (i, NULL, d))
=>
{ pr " ";
pr_i i;
pr " --@--> ";
pr_i d;
pr "\n";
};
(MOVE (i, THE c, d))
=>
{ pr " ";
pr_i i;
pr " --";
pr_c c;
pr "--> ";
pr_i d;
pr "\n";
};
end
moves;
};
fun move' moves (i, c)
=
case (mvs::find f moves)
#
THE (MOVE (s1, THE c', s2)) => THE s2;
NULL => NULL;
_ => raise exception DIE "Compiler bug: Unsupported case in move'";
esac
where
fun f (MOVE (s1, THE c', s2)) => (s1==i and c==c');
f _ => raise exception DIE "Compiler bug: Unsupported case in move'";
end;
end;
# fun move (DFA { moves, ... } ) (i, c) = move' moves (i, c)
fun move (DFA { table, ... } ) (i, c)
=
a2::get (table, (i, char::to_int (c)-char::to_int (char::min_char)));
fun accepting' accepting i
=
i2::fold_backward
(\\ ((s, n), NULL) => if (s==i) THE n;
else NULL;
fi;
((s, n), THE (n')) => if (s==i) THE n;
else THE n';
fi;
end)
NULL
accepting;
# fun accepting (DFA { accepting, ... } ) i
# =
# accepting' accepting i
fun accepting (DFA { acc_table, ... } ) i
=
rwv::get (acc_table, i);
fun can_start (DFA { start_table, ... } ) c
=
rwv::get (start_table, char::to_int (c));
fun build' nfa
=
{ move = nfa::move nfa;
accepting = nfa::accepting nfa;
start = nfa::start nfa;
chars = nfa::chars nfa;
fun get_all_chars ps
=
is::fold_forward
(\\ (s, cs) = cs::add_list (cs, chars s))
cs::empty ps;
init_chars = get_all_chars start;
fun get_all_states (ps, c)
=
is::fold_forward
(\\ (s, ss) = is::union (ss, move (s, c)))
is::empty ps;
fun loop ([], set, moves) => (set, moves);
loop (x ! xs, set, moves)
=>
{ cl = get_all_chars (x);
my (nstack, sdu, ml)
=
cs::fold_forward
(\\ (c, (ns, sd, ml))
=
{ u = get_all_states (x, c);
if ((not (iss::member (set, u))
and (not (iss::member (sd, u))))
)
( u ! ns,
iss::add (sd, u),
(x, c, u) ! ml
);
else
(ns, sd, (x, c, u) ! ml);
fi;
}
)
([], iss::empty,[])
cl;
loop (nstack@xs, iss::union (set, sdu), ml@moves);
};
end;
my (s_set, m_list)
=
loop ([start], iss::singleton (start), []);
num = REF 1;
fun new ()
=
{ n = *num;
num := n+1 ;
n;
};
s_map = map::set (map::empty, start, 0);
s_set' = iss::drop (s_set, start);
s_map = iss::fold_forward
(\\ (is, map) = map::set (map, is, new ()))
s_map
s_set';
states = is::add_list (is::empty, list::from_fn (*num, \\ x = x));
moves = mvs::add_list (mvs::empty,
map (\\ (is1, c, is2)
=
MOVE (the (map::get (s_map, is1)),
THE c,
the (map::get (s_map, is2)))
)
m_list);
# Given a set of accepting states, look for a given state,
# with the minimal corresponding pattern number
fun min_pattern acc_set
=
loop (tail l, head l)
where
l = map (the o accepting) (is::vals_list acc_set);
fun loop ([], min)
=>
min;
loop (n ! ns, min)
=>
if (n < min) loop (ns, n);
else loop (ns, min);
fi;
end;
end;
accept = iss::fold_forward
(\\ (is, cis)
=
{ items = is::filter
(\\ k
=
case (accepting k)
THE _ => TRUE;
NULL => FALSE;
esac
)
is;
if (is::is_empty items)
#
cis;
else
i2::add (cis, (the (map::get (s_map, is)),
min_pattern items));
fi;
}
)
i2::empty
s_set;
table = a2::from_fn
( (*num, char::to_int (char::max_char)-char::to_int (char::min_char)+1),
\\ (s, c) = move' moves (s, char::from_int (c+char::to_int (char::min_char)))
);
acc_table = rwv::from_fn
( *num,
\\ (s) = accepting' accept s
);
start_table = rwv::from_fn
( char::to_int (char::max_char) - char::to_int (char::min_char)+1,
\\ c = cs::member (init_chars, char::from_int (c+char::to_int (char::min_char)))
);
DFA { states, moves, accepting=>accept, table, acc_table, start_table };
};
fun build r
=
build' (nfa::build (r, 0));
fun build_pattern rs
=
build' (nfa::build_pattern rs);
};
end;
## COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies.
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.