## sml-fun-output.pkg
# Compiled by:
#
src/app/future-lex/src/lexgen.lib# Code generation for SML, using control-flow
### "Chaos theory is not nearly as exciting as it sounds. How could it be?"
###
### -- Stephen Kellert
stipulate
package fil = file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkg package lms = list_mergesort; # list_mergesort is from
src/lib/src/list-mergesort.pkg package lo = lex_output_spec; # lex_output_spec is from
src/app/future-lex/src/backends/lex-output-spec.pkg package re = regular_expression; # regular_expression is from
src/app/future-lex/src/regular-expression.pkg package sis = regular_expression::symbol_set;
package sym = re::sym;
herein
package smlfun_output
: (weak) Output # Output is from
src/app/future-lex/src/backends/output.api {
Ml_Exp == ml::Ml_Exp;
Ml_Pat == ml::Ml_Pat;
inp = "inp";
inp_variable = ML_VAR inp;
fun id_of (lo::STATE { id, ... } )
=
id;
fun name_of' i = "yyQ" + (int::to_string i);
fun name_of s = name_of' (id_of s);
fun act_name i = "yyAction" + (int::to_string i);
# Simple heuristic to avoid computing unused values:
#
stipulate
has = string::is_substring;
herein
hasyytext = has "yytext";
has_reject = has "REJECT";
hasyylineno = has "yylineno";
end;
fun map_int f syms # Map over the intervals of a symbol set
=
sis::foldl_int
(\\ (i, ls) = (f i) ! ls)
[]
syms;
Transition_Interval # Transition interval representation:
=
TI (sis::Interval, Int, Ml_Exp);
fun interval_of (TI (i, t, e)) = i;
fun tag_of (TI (i, t, e)) = t;
fun action_of (TI (i, t, e)) = e;
fun same_tag (TI (_, t1, _), TI (_, t2, _)) = t1 == t2;
fun singleton (TI ((i, j), _, _)) = i == j;
# Generate code for transitions: generate a hard-coded binary
# search on accepting characters
fun mk_trans ([ ], _) => raise exception DIE "(BUG) SMLFunOutput: alphabet not covered";
mk_trans ([t], _) => action_of t;
mk_trans ([t1, t2], _)
=>
if (same_tag (t1, t2) )
#
action_of t1;
else
my (_, t1end) = interval_of t1;
my (t2start, _) = interval_of t2;
if (singleton t1)
#
ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM t1end),
action_of t1,
action_of t2);
elif (singleton t2)
#
ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM t2start),
action_of t2,
action_of t1);
else
#
ML_IF (ML_CMP (ml::LEQ, inp_variable, ML_SYM t1end),
action_of t1,
action_of t2);
fi;
fi;
mk_trans (ts, len)
=>
{ lh = len / 2;
#
fun split ( ls, 0, l1) => (list::reverse l1, ls);
split (l ! ls, count, l1) => split (ls, count - 1, l ! l1);
split _ => raise exception DIE "(BUG) SMLFunOutput: split failed";
end;
my (ts1, ts2)
=
split (ts, lh, []);
my (ts2start, ts2end)
=
interval_of (list::head ts2);
my (ts2', ts2len)
=
ts2start == ts2end
?? (list::tail ts2, len - lh - 1)
:: ( ts2, len - lh );
# we want to take advantage of the special case when
# len = 3 and hd ts2 is a singleton. this case often
# occurs when we have an arrow for a single character.
#
else_clause
=
if (lh == 1 and ts2len == 1)
#
mk_trans ([list::head ts1, list::head ts2'], 2);
else
ML_IF (ML_CMP (ml::LT, inp_variable, ML_SYM ts2start),
mk_trans (ts1, lh),
mk_trans (ts2', ts2len));
fi;
ML_IF (ML_CMP (ml::EQ, inp_variable, ML_SYM ts2start),
action_of (list::head ts2),
else_clause);
};
end;
fun mk_state action_vec (s, k)
=
{ s -> lo::STATE { id, start_state, label, final, next };
#
fun add_match (i, last_match)
=
{ last_match' = has_reject (vector::get (action_vec, i))
?? last_match
:: ML_VAR "yyNO_MATCH";
ML_APP ("yyMATCH",
[ML_VAR "stream",
ML_VAR (act_name i),
last_match']);
};
my (cur_match, next_matches)
=
case final
[] => (NULL, []);
f ! fs => (THE f, fs);
esac;
last_match
=
list::fold_backward add_match (ML_VAR "lastMatch") next_matches;
# Collect all valid transition symbols
labels
=
list::fold_forward sis::union sis::empty (list::map #1 *next);
# pair transition intervals with associated actions/transitions
new_final
=
case cur_match
#
THE j => add_match (j, last_match);
NULL => last_match;
esac;
fun arrows (syms, s)
=
map_int
(\\ i => TI (i, id_of s,
ML_APP (name_of s, [ML_VAR "stream'", new_final])); end )
syms;
tis = list::map arrows *next;
err_act'
=
case cur_match
#
THE j => ML_APP ( act_name j,
[ ML_VAR "stream",
has_reject (vector::get (action_vec, j))
?? last_match
:: ML_VAR "yyNO_MATCH"
]
);
NULL => ML_APP ("yystuck", [last_match]);
esac;
# If start state, check for eof:
#
err_act = if start_state
#
ML_IF (ML_APP("yyInput::eof", [ML_VAR "stream"]),
ML_APP("user_declarations::eof", [ML_VAR "yyarg"]),
err_act');
else
err_act';
fi;
# Error transitions = complement (valid transitions)
#
error = sis::complement labels;
err_tis = map_int (\\ i = TI (i, -1, err_act)) error;
# The arrows represent intervals that partition the entire
# alphabet, with each interval mapped to some transition or
# action. We sort the intervals by their smallest member:
#
fun gt (a, b)
=
(#1 (interval_of a)) > (#1 (interval_of b));
sorted = lms::sort_list gt (list::cat (err_tis ! tis));
# Now we want to find adjacent partitions with the same
# action, and merge their intervals:
#
fun merge [ ] => [ ];
merge [t] => [t];
merge (t1 ! t2 ! ts)
=>
if (same_tag (t1, t2) )
#
t1 -> TI ((i, _), tag, act);
t2 -> TI ((_, j), _, _ );
t = TI ((i, j), tag, act);
merge (t ! ts);
else
t1 ! (merge (t2 ! ts));
fi;
end;
merged = merge sorted;
# Create the transition code
#
trans = mk_trans (merged, list::length merged);
# Create the input code
#
get_inp
=
# trans has at least the error action. if length (merged)
# is 1 then we can avoid getting any input and simply
# take the error transition in all cases. note that
# the "error" transition may actually be a match
#
case merged
#
[_] => err_act;
_ => ML_CASE (ML_APP ("yygetc", [ML_VAR "stream"]),
[(ML_CON_PATTERN ("NULL", []), err_act),
(ML_CON_PATTERN ("THE", [ML_VAR_PATTERN (inp + ", stream'")]),
trans)]);
esac;
ML_FUN (name_of s, ["stream", "lastMatch"], get_inp, k);
};
fun mk_action (i, action, k)
=
{ upd_strm = ML_REF_PUT (ML_VAR "yystrm", ML_VAR "stream");
act = ML_RAW [ml::TOK action];
seq = ML_SEQ [upd_strm, act];
lett = if (hasyytext action )
ML_LET ( "yytext",
ML_APP("yymktext", [ML_VAR "stream"]),
seq
);
else
seq;
fi;
letl = if (hasyylineno action)
#
ML_LET ( "yylineno",
ML_APP ( "REF",
[ML_APP ("yyInput::getlineNo",
[ML_REF_GET (ML_VAR "yystrm")])]),
lett
);
else
lett;
fi;
letr = if (has_reject action)
#
ML_LET ("oldStrm", ML_REF_GET (ML_VAR "yystrm"),
ML_FUN
("REJECT", [],
ML_SEQ
[ML_REF_PUT (ML_VAR "yystrm",
ML_VAR "oldStrm"),
ML_APP("yystuck", [ML_VAR "lastMatch"])],
letl));
else
letl;
fi;
ML_NEW_GROUP (ML_FUN (act_name i, ["stream", "lastMatch"], letr, k));
};
package scc
=
digraph_strongly_connected_components_g (
package {
Key = lo::Dfa_State;
fun compare (lo::STATE { id => id1, ... }, lo::STATE { id => id2, ... } )
=
int::compare (id1, id2);
}
);
fun mk_states (actions, dfa, start_states, k)
=
{ fun follow (lo::STATE { next, ... } )
=
#2 (paired_lists::unzip *next);
scc = scc::topological_order' { roots => start_states, follow };
mk_state' = mk_state actions;
fun mk_grp (scc::SIMPLE state, k) => ML_NEW_GROUP (mk_state' (state, k));
mk_grp (scc::RECURSIVE states, k) => ML_NEW_GROUP (list::fold_backward mk_state' k states);
end;
list::fold_forward mk_grp k scc;
};
fun lexer_hook spec stream
=
{ spec -> lo::SPEC { actions, dfa, start_states, ... };
#
fun match_ss (label, state)
=
( ML_CON_PATTERN (label, []),
ML_APP (name_of state,
[ML_REF_GET (ML_VAR "yystrm"),
ML_VAR "yyNO_MATCH"])
);
inner_expression = ML_CASE (ML_REF_GET (ML_VAR "yyss"),
list::map match_ss start_states);
states_expression = mk_states
(actions, dfa,
#2 (paired_lists::unzip start_states), inner_expression);
lexer_expression = vector::keyed_fold_backward mk_action states_expression actions;
prettyprint_stream = plain_file_prettyprinter::make_plain_file_prettyprinter { output_stream => stream };
ml::prettyprint_ml (prettyprint_stream, lexer_expression);
};
fun start_states_hook spec stream
=
{ spec -> lo::SPEC { start_states, ... };
#
mach_names = #1 (paired_lists::unzip start_states);
fil::write (stream, string::join "
| " mach_names);
};
fun user_decls_hook spec stream
=
{ spec -> lo::SPEC { decls, ... };
#
fil::write (stream, decls);
};
fun header_hook spec stream
=
{ spec -> lo::SPEC { header, ... };
#
fil::write (stream, header);
};
fun args_hook spec stream
=
{ spec -> lo::SPEC { arg, ... };
#
arg' = if (string::length_in_bytes arg == 0)
#
"(yyarg as ())";
else
"(yyarg as " + arg + ") ()";
fi;
fil::write (stream, arg');
};
package tio= file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkg template
=
{
file = tio::open_for_read "backends/sml/template-sml-fun.pkg";
#
fun done () = tio::close_input file;
fun read ()
=
case (tio::read_line file)
#
NULL => [];
THE line => line ! read();
esac;
(read()
except
ex = { done(); raise exception ex;}
)
then
done();
};
fun output (spec, fname)
=
expand_file::expand {
src => template,
dst => fname + ".pkg",
hooks => [ ("lexer", lexer_hook spec),
("startstates", start_states_hook spec),
("userdecls", user_decls_hook spec),
("header", header_hook spec),
("args", args_hook spec)
]
};
};
end;
## John Reppy (http://www.cs.uchicago.edu/~jhr)
## Aaron Turon (adrassi@gmail.com)
## All rights reserved.
## COPYRIGHT (c) 2005
## Subsequent changes by Jeff Prothero Copyright (c) 2010-2015,
## released per terms of SMLNJ-COPYRIGHT.