## nfa.pkg
# Compiled by:
#
src/lib/std/standard.lib# Non-deterministic and deterministic finite-state machines.
### "It is difficult to extract sense from strings,
### but they are the only communication coin we can count on."
###
### -- Alan Perlis
stipulate
package are = abstract_regular_expression; # abstract_regular_expression is from
src/lib/regex/front/abstract-regular-expression.pkg package fil = file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkgherein
package nfa
: (weak) Nfa # Nfa is from
src/lib/regex/backend/nfa.api {
exception SYNTAX_NOT_HANDLED;
Move = MOVE (Int, Null_Or(Char), Int);
fun compare_char_option (NULL, NULL ) => EQUAL;
compare_char_option (NULL, THE (c)) => LESS;
compare_char_option (THE (c), NULL) => GREATER;
compare_char_option (THE (c), THE (c'))
=>
char::compare (c, c');
end;
package int_set
=
list_set_g (
Key = Int;
compare = int::compare;
);
package int2set
=
list_set_g (
Key = (Int, Int);
fun compare ((i1, i2), (j1, j2))
=
case (int::compare (i1, j1))
EQUAL => int::compare (i2, j2);
v => v;
esac;
);
package move_set
=
list_set_g (
Key = Move;
fun compare (MOVE (i, c, j), MOVE (i', c', j'))
=
case (int::compare (i, i'))
EQUAL => case (compare_char_option (c, c'))
EQUAL => int::compare (j, j');
v => v;
esac;
v => v;
esac;
);
package char_set
=
list_set_g (
Key = Char;
compare = char::compare;
);
package i = int_set;
package i2 = int2set;
package m = move_set;
package c = char_set;
# Create sets from lists
#
fun i_list l = i::add_list (i::empty, l);
fun m_list l = m::add_list (m::empty, l);
Nfa = NFA { states: i::Set,
moves: m::Set,
accepting: i2::Set
};
fun print (NFA { states, moves, accepting } )
=
{ pr = fil::print;
pr_i = fil::print o int::to_string;
pr_i2 = fil::print o (\\ (i1, i2) => (int::to_string i1); end );
pr_c = fil::print o char::to_string;
pr ("States: 0 -> ");
pr_i (i::vals_count (states) - 1);
pr "\nAccepting:";
i2::apply (\\ k => { pr " "; pr_i2 k;}; end ) accepting;
pr "\nMoves\n";
m::apply
\\ (MOVE (i, NULL, d)) => { pr " ";
pr_i i;
pr " --@--> ";
pr_i d;
pr "\n";
};
(MOVE (i, THE c, d)) => { pr " ";
pr_i i;
pr " --";
pr_c c;
pr "--> ";
pr_i d;
pr "\n";
};
end
moves;
};
fun null_accept n
=
NFA { states=>i_list [0, 1], moves=>m::add (m::empty, MOVE (0, NULL, 1)),
accepting=>i2::singleton (1, n) };
fun null_refuse n
=
NFA { states=>i_list [0, 1], moves=>m::empty,
accepting=>i2::singleton (1, n) };
fun renumber n st = n + st;
fun renumber_move n (MOVE (s, c, s')) = MOVE (renumber n s, c, renumber n s');
fun renumber_acc n (st, n') = (n+st, n');
fun build' n (are::GROUP e)
=>
build' n e;
build' n (are::ALT l)
=>
fold_backward
(\\ (NFA { states=>s1, moves=>m1, ... },
NFA { states=>s2, moves=>m2, ... }
)
=
{ k1 = i::vals_count s1;
k2 = i::vals_count s2;
s1' = i::map (renumber 1) s1;
s2' = i::map (renumber (k1+1)) s2;
m1' = m::map (renumber_move 1) m1;
m2' = m::map (renumber_move (k1+1)) m2;
NFA { states => i::add_list (i::union (s1', s2'),
[0, k1+k2+1]),
moves => m::add_list (m::union (m1', m2'),
[MOVE (0, NULL, 1),
MOVE (0, NULL, k1+1),
MOVE (k1, NULL, k1+k2+1),
MOVE (k1+k2, NULL, k1+k2+1)]),
accepting => i2::singleton (k1+k2+1, n)
};
}
)
(null_refuse n)
(map (build' n) l);
build' n (are::CONCAT l)
=>
fold_backward
(\\ (NFA { states=>s1, moves=>m1, ... },
NFA { states=>s2, moves=>m2, accepting }
)
=
{ k = i::vals_count s1 - 1;
s2' = i::map (renumber k) s2;
m2' = m::map (renumber_move k) m2;
accepting' = i2::map (renumber_acc k) accepting;
NFA { states=>i::union (s1, s2'),
moves=>m::union (m1, m2'),
accepting=>accepting'
};
}
)
(null_accept n)
(map (build' n) l);
build' n (are::INTERVAL (e, n1, n2)) => raise exception SYNTAX_NOT_HANDLED;
build' n (are::OPTION e) => build' n (are::ALT [are::CONCAT [], e]);
build' n (are::PLUS e)
=>
{ my (NFA { states, moves, ... } ) = build' n e;
m = i::vals_count states;
NFA { states=>i::add (states, m),
moves=>m::add_list (moves, [MOVE (m - 1, NULL, m),
MOVE (m - 1, NULL, 0)]),
accepting=>i2::singleton (m, n)
};
};
build' n (are::STAR e) => build' n (are::ALT [are::CONCAT [], are::PLUS e]);
build' n (are::MATCH_SET s)
=>
if (are::char_set::is_empty s)
#
null_accept n;
else
moves = are::char_set::fold_forward
(\\ (c, move_set)
=
m::add (move_set, MOVE (0, THE c, 1)))
m::empty
s;
NFA { states=>i_list [0, 1],
moves,
accepting=>i2::singleton (1, n)
};
fi;
build' n (are::NONMATCH_SET s)
=>
{ moves = are::char_set::fold_forward
(\\ (c, move_set) = m::add (move_set, MOVE (0, THE c, 1)))
m::empty
(are::char_set::difference (are::all_chars, s));
NFA { states=>i_list [0, 1],
moves,
accepting=>i2::singleton (1, n)
};
};
build' n (are::CHAR c)
=>
NFA { states => i_list [0, 1],
moves => m::singleton (MOVE (0, THE c, 1)),
accepting => i2::singleton (1, n)
};
# We itemize instead of using a wildcard so that
# a fresh warning will be generated if a new terminal
# is added to the Abstract_Regular_Expression sumtype:
#
build' n (are::BEGIN) => raise exception SYNTAX_NOT_HANDLED;
build' n (are::END) => raise exception SYNTAX_NOT_HANDLED;
build' n (are::ASSIGN _) => raise exception SYNTAX_NOT_HANDLED;
build' n (are::BACK_REF _) => raise exception SYNTAX_NOT_HANDLED;
build' n (are::GUARD _) => raise exception SYNTAX_NOT_HANDLED;
build' n (are::BOUNDARY _) => raise exception SYNTAX_NOT_HANDLED;
end;
fun build (r, n)
=
{ my (NFA { states, moves, accepting } )
=
build' n r;
# Clean up the nfa to remove epsilon moves.
# A simple way to do this:
# 1. states= { 0 }, moves= {}
# 2. for every s in states,
# 3. compute closure (s)
# 4. for any move (i, c, o) with i in closure (s)
# 5. add move (0, c, o) to moves
# 6. add state o to states
# 7. repeat until no modifications to states and moves
NFA { states, moves, accepting };
};
fun build_pattern rs
=
{ fun loop ([], _) => [];
loop (r ! rs, n)
=>
(build (r, n)) ! (loop (rs, n+1));
end;
rs' = loop (rs, 0);
renums = fold_backward
(\\ (NFA { states, ... }, acc)
=
1 ! (map (\\ k = k+i::vals_count states)
acc
) )
[]
rs';
news
=
paired_lists::map
( \\ (NFA { states, moves, accepting }, renum)
=
{ new_states = i::map (renumber renum) states;
new_moves = m::map (renumber_move renum) moves;
make_access = i2::map (renumber_acc renum) accepting;
NFA { states => new_states,
moves => new_moves,
accepting => make_access
};
}
)
(rs', renums);
my (states, moves, accepting)
=
fold_forward
( \\ ( NFA { states, moves, accepting },
(acc_s, acc_m, acc_a)
)
=
( i::union (states, acc_s),
m::union (moves, acc_m),
i2::union (accepting, acc_a)
)
)
( i::singleton 0,
m::add_list (
m::empty,
map (\\ k = MOVE (0, NULL, k)) renums
),
i2::empty
)
news;
NFA { states, moves, accepting };
};
fun accepting (NFA { accepting, ... } ) state
=
{ item = i2::find
(\\ (i, _) = (i==state))
accepting;
case item
THE (s, n) => THE (n);
NULL => NULL;
esac;
};
# Compute possible next states
# from orig with character c
#
fun one_move (NFA { moves, ... } ) (orig, char)
=
m::fold_backward
\\ (MOVE (_, NULL, _), set)
=>
set;
(MOVE (or_op, THE c, d), set)
=>
if (c==char and or_op==orig) i::add (set, d);
else set;
fi;
end
i::empty
moves;
fun closure (NFA { moves, ... } ) orig_set
=
loop orig_set
where
fun add_state (MOVE (orig, NULL, dest), (b, states))
=>
if ( i::member (states, orig) and
not (i::member (states, dest))
)
(TRUE, i::add (states, dest));
else
(b, states);
fi;
add_state (_, bs) => bs;
end;
fun loop (states)
=
{ my (modified, new)
=
m::fold_backward
add_state
(FALSE, states)
moves;
if modified loop (new);
else new; fi;
};
end;
fun move nfa
=
{ closure = closure nfa;
one_move = one_move nfa;
closure o one_move;
};
fun start nfa
=
closure nfa (i::singleton 0);
fun chars (NFA { moves, ... } ) state
=
{ fun f (MOVE (s1, THE c, s2), s)
=>
if (s1 == state)
c::add (s, c);
else
s;
fi;
f (_, s) => s;
end;
c::vals_list (m::fold_forward f c::empty moves);
};
};
end;