## make-larl-g.pkg
# Mythryl-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi
# Compiled by:
#
src/app/yacc/src/mythryl-yacc.lib### "If I had staid for other people to
### make my tools & things for me,
### I had never made anything of it..."
###
### -- Isaac Newton
generic package make_lalr_g (
package internal_grammar: Internal_Grammar; # Internal_Grammar is from
src/app/yacc/src/internal-grammar.api package core: Core; # Core is from
src/app/yacc/src/core.api package graph: Lr_Graph; # Lr_Graph is from
src/app/yacc/src/lr-graph.api package look: Look; # Look is from
src/app/yacc/src/look.api sharing graph::core == core;
sharing graph::internal_grammar == core::internal_grammar
== look::internal_grammar
== internal_grammar;
)
: (weak) La_Lr_Graph # La_Lr_Graph is from
src/app/yacc/src/la-lr-graph.api{
include package rw_vector;
include package list;
infix my 9 sub;
include package internal_grammar::grammar;
include package internal_grammar;
include package core;
include package graph;
include package look;
package graph = graph;
package core = core;
package grammar= internal_grammar::grammar; # internal_grammar is from
src/app/yacc/src/grammar.pkg package internal_grammar = internal_grammar;
Tmpcore = TMPCORE (List( (Item, Ref( List( Terminal )))), Int);
Lcore = LCORE (List( (Item, List( Terminal )) ), Int);
fun pr_lcore (a as (symbol_to_string, nonterm_to_string, term_to_string, print))
=
{ print_item = print_item (symbol_to_string, nonterm_to_string, print);
print_lookahead = pr_look (term_to_string, print);
\\ (LCORE (items, state))
=>
{ print "\n";
print "state ";
print (int::to_string state);
print " :\n\n";
list::apply
( \\ (item, lookahead)
=>
{ print "{ ";
print_item item;
print ", ";
print_lookahead lookahead;
print "}\n";
}; end
)
items;
}; end ;
};
exception LALR Int;
package item_list
=
list_ord_set_g (
package {
Element = (Item, Ref( List( Terminal ) ));
fun eq ((a, _), (b, _)) = eq_item (a, b);
fun gt ((a, _), (b, _)) = gt_item (a, b);
}
);
package nonterm_set
=
list_ord_set_g (
package {
Element = Nonterminal;
gt = gt_nonterm;
eq = eq_nonterm;
}
);
# NTL: nonterms with lookahead
package ntl
=
redblack_ord_set_g (
package {
Element = (Nonterminal, List( Terminal ));
fun gt ((i, _), (j, _)) = gt_nonterm (i, j);
fun eq ((i, _), (j, _)) = eq_nonterm (i, j);
}
);
debug = FALSE;
fun add_lookahead { graph, nullable, first, eop,
rules, produces, nonterms, eps_prods,
print, term_to_string, nonterm_to_string }
=
{ eop = look::make_set eop;
fun symbol_to_string ( TERMINAL t) => term_to_string t;
symbol_to_string (NONTERMINAL t) => nonterm_to_string t;
end;
print = if debug print;
else \\ _ = ();
fi;
pr_look = if debug pr_look (term_to_string, print);
else \\ _ = ();
fi;
pr_nonterm = print o nonterm_to_string;
pr_rule = if debug pr_rule (symbol_to_string, nonterm_to_string, print);
else \\ _ = ();
fi;
print_int = print o (int::to_string: Int -> String);
print_item = print_item (symbol_to_string, nonterm_to_string, print);
# look_pos: position in the rhs of a rule at which we should start placing
# lookahead ref cells, i.e. the minimum place at which A -> x .B y, where
# B is a nonterminal and y =*=> epsilon, or A -> x. is TRUE. Positions are
# given by the number of symbols before the place. The place before the first
# symbol is 0, etc.
stipulate
positions = make_rw_vector (length rules, 0);
# rule_pos: calculate place in the rhs of a rule at which we should start
# placing lookahead ref cells
#
fun rule_pos (RULE { rhs, ... } )
=
case (reverse rhs)
#
NIL => 0;
( TERMINAL t) ! r => length rhs;
(NONTERMINAL n ! r)
=>
{ # f assumes that everything after n in the
# rule has proven to be nullable so far.
# Remember that the rhs has been reversed,
# implying that this is TRUE initially
fun f (b, (r as (TERMINAL _ ! _)))
=> # A -> .z t B y, where y is nullable
length r;
f (c, (NONTERMINAL b ! r))
=> # A -> .z B C y
if (nullable c ) f (b, r);
else length r + 1;fi;
f (_, [])
=> # A -> .B y, where y is nullable
0;
end;
f (n, r);
};
esac;
fun check_rule (rule as RULE { num, ... } )
=
{ pos = rule_pos rule;
#
print "look_pos: ";
pr_rule rule;
print " = ";
print_int pos;
print "\n";
rw_vector::set (positions, num, rule_pos rule);
};
my _ = apply check_rule rules;
herein
fun look_pos (RULE { num, ... })
=
positions[ num ];
end;
# rest_is_null: TRUE for items of the form A -> x .B y, where y is nullable
fun rest_is_null (ITEM { rule, dot, rhs_after=>NONTERMINAL _ ! _} )
=>
dot >= (look_pos rule);
rest_is_null _
=>
FALSE;
end;
# map core to a new core including only items of the form A -> x. or
# A -> x. B y, where y =*=> epsilon. It also adds epsilon productions to the
# core. Each item is given a ref cell to hold the lookahead nonterminals for
# it.
stipulate
fun f (item as ITEM { rhs_after=>NIL, ... }, r)
=>
(item, REF NIL) ! r;
f (item, r)
=>
if (rest_is_null item)
(item, REF NIL) ! r;
else
r;
fi;
end;
herein
fun map_core (c as CORE (items, state))
=
{ eps_items
=
map (\\ rule=>(ITEM { rule, dot=>0, rhs_after=>NIL },
REF (NIL: List( Terminal ))); end
) (eps_prods c);
TMPCORE (item_list::union (list::fold_backward f [] items, eps_items), state);
};
end;
new_nodes
=
map map_core (nodes graph);
exception FIND;
# findRef: state * item -> lookahead ref cell for item
stipulate
states = rw_vector::from_list new_nodes;
dummy = REF NIL;
herein
fun find_ref (state, item)
=
{ my TMPCORE (l, _) = states[ state ];
#
case (item_list::find((item, dummy), l))
#
THE (_, look_ref)
=>
look_ref;
NULL =>
{ print "find failed: state ";
print_int state;
print "\nitem =\n";
print_item item;
print "\nactual items =\n";
apply (\\ (i, _) = { print_item i;
print "\n";
}
)
l;
raise exception FIND;
};
esac;
};
end;
# findRuleRefs: state -> rule -> lookahead refs for rule.
stipulate
shift = shift graph;
herein
fun find_rule_refs state (rule as RULE { rhs=>NIL, ... } )
=> # handle epsilon productions
[find_ref (state, ITEM { rule, dot=>0, rhs_after=>NIL } )];
find_rule_refs state (rule as RULE { rhs=>symbol ! rest, ... } )
=>
scan (shift (state, symbol), rest, pos - 1)
where
pos = int::max (look_pos rule, 1);
#
fun scan' (state, NIL, pos, result)
=>
find_ref (state, ITEM { rule, dot=>pos, rhs_after=>NIL } ) ! result;
scan'(state, rhs as symbol ! rest, pos, result)
=>
scan' (shift (state, symbol), rest, pos+1,
find_ref (state, ITEM { rule, dot=>pos, rhs_after=>rhs } ) ! result);
end;
# find first item of the form A -> x .B y, where y =*=> epsilon and
# x is not epsilon, or A -> x. use scan' to pick up all refs after this
# point
fun scan (state, NIL, _)
=>
[ find_ref (state, ITEM { rule, dot=>pos, rhs_after=>NIL } )];
scan (state, rhs, 0)
=>
scan'(state, rhs, pos, NIL);
scan (state, symbol ! rest, place)
=>
scan (shift (state, symbol), rest, place - 1);
end;
end;
end;
end;
# function to compute for some nonterminal n the set of nonterminals A added
# through the closure of nonterminal n such that n =c*=> .A x, where x is
# nullable
fun nonterms_w_null nt
=
dfs (nt, nonterm_set::empty)
where
#
fun collect_nonterms n
=
list::fold_backward
( \\ (rule as RULE { rhs as NONTERMINAL n ! _, ... }, r)
=>
case (rest_is_null (ITEM { dot=>0, rhs_after=>rhs, rule } ))
#
TRUE => n ! r;
FALSE => r;
esac;
(_, r) => r;
end
)
[]
(produces n);
fun dfs (a as (n, r))
=
if (nonterm_set::exists a)
#
r;
else
list::fold_backward
dfs
(nonterm_set::set (n, r))
(collect_nonterms n);
fi;
end;
stipulate
data = make_rw_vector (nonterms, nonterm_set::empty);
#
fun f n
=
if (n != nonterms)
#
rw_vector::set (data, n, nonterms_w_null (NONTERM n));
f (n+1);
fi;
my _ = f 0;
herein
fun nonterms_w_null (NONTERM nt)
=
data[ nt ];
end;
# look_info: for some nonterminal n the set of nonterms A added
# through the closure of the nonterminal such that n =c+=> .Ax and the
# lookahead accumlated for each nonterm A
fun look_info nt
=
dfs ((nt, NIL), ntl::empty)
where
#
fun collect_nonterms n
=
list::fold_backward
( \\ (RULE { rhs=>NONTERMINAL n ! t, ... }, r)
=>
case (ntl::find ((n, NIL), r))
#
THE (key, data) => ntl::set ((n, look::union (data, first t)), r);
NULL => ntl::set ((n, first t), r);
esac;
(_, r) => r;
end
)
ntl::empty
(produces n);
fun dfs (a as ((key1, data1), r))
=
case (ntl::find a)
#
THE (_, data2) => ntl::set((key1, look::union (data1, data2)), r);
NULL => ntl::fold dfs (collect_nonterms key1) (ntl::set a);
esac;
end;
look_info
=
if (not debug)
look_info;
else
\\ nt =>
info
where
print "look_info of ";
pr_nonterm nt;
print "=\n";
info = look_info nt;
ntl::apply
( \\ (nt, lookahead)
=>
{ pr_nonterm nt;
print ": ";
pr_look lookahead;
print "\n\n";
};
end
)
info;
end; end ;
fi;
# prop_look: propagate lookaheads for nonterms added in the closure of a
# nonterm. Lookaheads must be propagated from each nonterminal m to
# all nonterminals { n
| m =c+=> nx, where x=*=>epsilon }
fun prop_look ntl
=
ntl::fold upd_nonterm ntl ntl
where
fun upd_lookhd new_look (nt, r)
=
case (ntl::find ((nt, new_look), r))
#
THE (_, old_look)
=>
ntl::set((nt, look::union (new_look, old_look)), r);
NULL => raise exception (LALR 241);
esac;
fun upd_nonterm ((nt, get), r)
=
nonterm_set::fold
(upd_lookhd get)
(nonterms_w_null nt)
r;
end;
prop_look
=
if (not debug)
#
prop_look;
else
\\ ntl
=>
info
where
print "prop_look =\n";
info = prop_look ntl;
ntl::apply
( \\ (nt, lookahead)
=>
{ pr_nonterm nt;
print ": ";
pr_look lookahead;
print "\n\n";
};
end
)
info;
end; end ;
fi;
# Now put the information from these functions together.
# Create a function which takes a nonterminal n
# and returns a list of triplets of
# (a nonterm added through closure,
# the lookahead for the nonterm,
# whether the nonterm should include the lookahead for the nonterminal
# whose closure is being taken (i.e. first (y) for an item j of the
# form A -> x .n y and lookahead (j) if y =*=> epsilon)
#
stipulate
data = make_rw_vector (nonterms, NIL: List( (Nonterminal, List( Terminal ), Bool)));
#
fun do_nonterm i
=
result
where
#
nonterms_followed_by_null
=
nonterms_w_null i;
nonterms_added_through_closure
=
ntl::make_list (prop_look (look_info i));
result
=
map
( \\ (nt, l)
=
(nt, l, nonterm_set::exists (nt, nonterms_followed_by_null))
)
nonterms_added_through_closure;
if debug
#
print "closure_nonterms = ";
pr_nonterm i;
print "\n";
apply
( \\ (nt, get, nullable)
=
{ pr_nonterm nt;
print ":";
pr_look get;
case nullable
#
FALSE => print "(FALSE)\n";
TRUE => print "(TRUE)\n";
esac;
}
)
result;
print "\n";
fi;
end;
fun f i
=
if (i != nonterms)
#
rw_vector::set (data, i, do_nonterm (NONTERM i));
f (i+1);
fi;
my _ = f 0;
herein
fun closure_nonterms (NONTERM i)
=
data[ i ];
end;
# add_nonterm_lookahead: Add lookahead to all completion items for rules added
# when the closure of a given nonterm in some state is taken. It returns
# a list of lookahead refs to which the given nonterm's lookahead should
# be propagated. For each rule, it must trace the shift/gotos in the LR (0)
# graph to find all items of the form A-> x .B y where y =*=> epsilon or
# A -> x.
fun add_nonterm_lookahead (nt, state)
=
list::fold_backward f [] (closure_nonterms nt)
where
#
fun f ((nt, lookahead, nullable), r)
=
if nullable refs @ r;
else r;
fi
where
refs = map (find_rule_refs state) (produces nt);
refs = list::cat refs;
apply (\\ r = r := (look::union (*r, lookahead)))
refs;
end;
end;
# scan_core: Scan a core for all items of the form A -> x .B y.
#
# Applies add_nonterm_lookahead to each such B, and then merges first (y)
# into the list of refs returned by add_nonterm_lookahead.
#
# It returns a list of ref * ref List for all the items where y =*=> epsilon
fun scan_core (CORE (l, state))
=
f (l, NIL)
where
fun f ((item as ITEM { rhs_after=> NONTERMINAL b ! y, dot, rule } ) ! t, r)
=>
case (add_nonterm_lookahead (b, state))
#
NIL => r;
l=> { first_y = first y;
#
new_r = if (dot >= (look_pos rule))
(find_ref (state, item), l) ! r;
else r;
fi;
apply (\\ r = r := look::union(*r, first_y))
l;
f (t, new_r);
};
esac;
f (_ ! t, r) => f (t, r);
f (NIL, r) => r;
end;
end;
# Add end-of-parse symbols to set of items
# consisting of all items immediately
# derived from the start symbol
fun add_eop (c as CORE (l, state), eop)
=
apply f l
where
#
fun f (item as ITEM { rule, dot, ... } )
=
{ refs = find_rule_refs state rule;
# First take care of kernel items.
#
# Add the end-of-parse symbols to
# the lookahead sets for these items.
#
# Epsilon productions of the start symbol
# do not need to be handled specially
# because they will be in the kernel also.
apply (\\ r = r := look::union(*r, eop)) refs;
# Now take care of closure items.
# These are all nonterminals 'c' which
# have a derivation 's' =+=> .'c' x, where x is nullable
if (dot >= (look_pos rule))
#
case item
#
ITEM { rhs_after=>NONTERMINAL b ! _, ... }
=>
case (add_nonterm_lookahead (b, state))
#
NIL => ();
l => apply (\\ r = r := look::union(*r, eop)) l;
esac;
_ => ();
esac;
fi;
};
end;
fun iterate l
=
loop FALSE
where
fun f lookahead (NIL, done)
=>
done;
f lookahead (h ! t, done)
=>
{ old = *h;
#
h := look::union (old, lookahead);
if (length *h != length old) f lookahead (t, FALSE);
else f lookahead (t, done);
fi;
};
end;
fun g ((from, to) ! rest, done)
=>
{ new_done = f *from (to, done);
g (rest, new_done);
};
g (NIL, done) => done;
end;
fun loop TRUE => ();
loop FALSE => loop (g (l, TRUE));
end;
end;
lookahead = list::cat (map scan_core (nodes graph));
# used to scan the item list of a TMPCORE and remove the items not
# being reduced
fun create_lcore_list ((item as ITEM { rhs_after=>NIL, ... }, REF l), r)
=>
(item, l) ! r;
create_lcore_list (_, r)
=>
r;
end;
add_eop (graph::core graph 0, eop);
iterate lookahead;
map ( \\ (TMPCORE (l, state))
=
LCORE (list::fold_backward create_lcore_list [] l, state)
)
new_nodes;
};
};