## lex-fn.pkg
## John Reppy (http://www.cs.uchicago.edu/~jhr)
## Aaron Turon (adrassi@gmail.com)
## All rights reserved.
# Compiled by:
#
src/app/future-lex/src/lexgen.lib# DFA generation using RE derivatives
package lex_fn
: (weak)
api {
gen: lex_spec::Spec
->
lex_output_spec::Spec;
}
{
package re = regular_expression; # regular_expression is from
src/app/future-lex/src/regular-expression.pkg package sis = regular_expression::symbol_set;
package lo = lex_output_spec; # lex_output_spec is from
src/app/future-lex/src/backends/lex-output-spec.pkg package map
=
red_black_map_g ( # red_black_map_g is from
src/lib/src/red-black-map-g.pkg package {
Key = vector::Vector( re::Re );
compare = vector::compare_sequences re::compare;
}
);
# Given a list of RE vectors (start states), produce a DFA recognizer:
# NOTE: invoked once per start state (each start state has a DFA)
#
fun make_dfa start_vecs
=
{ n = REF 0; # next state id
states = REF [];
# Return the state that the re vector maps to and
# a flag set to TRUE if the state is new.
fun make_state (state_map, result, as_ss)
=
case (map::get (state_map, result))
NULL => {
id = *n;
fun add_final (idx, re, finals) =
if (re::nullable re)
idx ! finals;
else finals;fi;
q = lo::STATE {
id, start_state => as_ss, label => result,
final => vector::keyed_fold_backward add_final [] result,
next => REF []
};
n := id+1;
states := q ! *states;
(TRUE, q, map::set (state_map, result, q));
};
THE q => (FALSE, q, state_map);
esac;
fun init_iter (states, state_map, [])
=>
(list::reverse states, state_map);
init_iter (states, state_map, vec ! vecs)
=>
{ my (_, q, state_map') = make_state (state_map, vec, TRUE);
init_iter (q ! states, state_map', vecs);
};
end;
my (init_states, init_statemap)
=
init_iter ([], map::empty, start_vecs);
fun f (state_map, [])
=>
state_map;
f (state_map, lo::STATE { next, label, ... } ! work_list)
=>
{ fun move ((result, edge), (state_map, work_list))
=
if (vector::all re::is_none result) # if error transition
(state_map, work_list);
else
my (is_new, q, state_map)
=
make_state (state_map, result, FALSE);
next := (edge, q) ! *next;
if is_new (state_map, q ! work_list);
else (state_map, work_list); fi;
fi;
edges = re::derivatives label;
f (list::fold_forward move (state_map, work_list) edges);
};
end;
ignore (f (init_statemap, init_states));
(init_states, list::reverse *states);
};
# Clamp a machine to the right character set:
#
fun clamp clamp_to states
=
{ ascii127
=
sis::interval (0u0, 0u127);
fun clamp_trans (edge, q)
=
(sis::intersect (ascii127, edge), q);
fun clamp_state (lo::STATE { next, ... } )
=
next := list::map clamp_trans *next;
list::apply clamp_state states;
states;
};
fun gen spec
=
{
# TODO: check for invalid start states on rules XXX BUGGO
my lex_spec::SPEC { decls, conf, rules } = spec;
my lex_spec::CONF { struct_name, header,
arg, start_states, ... } = conf;
start_states' = quickstring_set::add (start_states, quickstring__premicrothread::from_string "INITIAL");
#
# # Split out actions and associate each ruleSpec to an action ID
# #
# # Note: matchActions tries to find textually idential actions and map
# # them to the same entry in the action vector
# #
# fun matchActions rules = let
# fun iter ((ruleSpec, action) ! rules,
# ruleSpecs, actions, actionMap, n) = let
# key = quicstring::from_string action
# my (i, actions', actionMap', n') =
# case quickstring_map::get (actionMap, key)
# of NULL => (n, action ! actions,
# quickstring_map::set (actionMap, key, n),
# n+1)
#
| THE i => (i, actions, actionMap, n)
# in
# iter (rules, (i, ruleSpec) ! ruleSpecs,
# actions', actionMap', n')
# end
#
| iter ([], ruleSpecs, actions, _, _) =
# (list::reverse ruleSpecs, list::reverse actions)
# in
# iter (rules, [], [], quickstring_map::empty, 0)
# end
# my (ruleSpecs, actions) = matchActions rules
my (rule_specs, actions) = paired_lists::unzip rules;
actions_vec = vector::from_list actions;
start_states = quickstring_set::vals_list start_states';
fun ssvec label
=
{ fun has_rule (NULL, re)
=>
re;
has_rule (THE ss, re)
=>
if (quickstring_set::member (ss, label))
re;
else regular_expression::none;
fi;
end;
rules = list::map has_rule rule_specs;
vector::from_list rules;
};
my (init_states, states)
=
make_dfa (list::map ssvec start_states);
lo::SPEC {
decls,
header => if (string::length_in_bytes header == 0)
"package " +
if (string::length_in_bytes struct_name == 0)
"Mlex";
else
struct_name;
fi;
else
header;
fi,
arg,
actions => actions_vec,
dfa => clamp lex_spec::CLAMP127 states,
start_states => paired_lists::zip
(list::map quickstring__premicrothread::to_string start_states,
init_states)
};
};
};