# Mythryl-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi
#
# parser.pkg: This is a parser driver for LR tables with an error-recovery
# routine added to it. The routine used is described in detail in this
# article:
#
# 'A Practical Method for LR and LL Syntactic Error Diagnosis and
# Recovery', by M. Burke and G. Fisher, ACM Transactions on
# Programming Langauges and Systems, Vol. 9, No. 2, April 1987, pages 164-197.
#
# This program is an implementation of the partial, deferred method discussed
# in the article. The algorithm and data structures used in the program
# are described below.
#
# This program assumes that all semantic actions are delayed. A semantic
# action should produce a function from Void -> Value instead of producing the
# normal Value. The parser returns the semantic value on the top of the
# stack when accept is encountered. The user can deconstruct this value
# and apply the Void -> Value function in it to get the answer.
#
# It also assumes that the lexer is a lazy stream.
#
# Data Structures:
# ----------------
#
# * The parser:
#
# The state stack has the type
#
# List (state, (semantic value, line #, line #))
#
# The parser keeps a queue of (state stack, lexer pair). A lexer pair
# consists of a (terminal, value) pair and a lexer. This allows the
# parser to reconstruct the states for terminals to the left of a
# syntax error, and attempt to make error corrections there.
#
# The queue consists of a pair of lists (x, y). New additions to
# the queue are cons'ed onto y. The first element of x is the top
# of the queue. If x is NIL, then y is reversed and used
# in place of x.
#
# Algorithm:
# ----------
#
# * The steady-state parser:
#
# This parser keeps the length of the queue of state stacks at
# a steady state by always removing an element from the front when
# another element is placed on the end.
#
# It has these arguments:
#
# stack: current stack
# queue: value of the queue
# lex_pair ((terminal, value), lex stream)
#
# When SHIFT is encountered, the state to shift to and the value are
# are pushed onto the state stack. The state stack and lex_pair are
# placed on the queue. The front element of the queue is removed.
#
# When REDUCTION is encountered, the rule is applied to the current
# stack to yield a triple (nonterm, value, new stack). A new
# stack is formed by adding (goto (top state of stack, nonterm), value)
# to the stack.
#
# When ACCEPT is encountered, the top value from the stack and the
# lexer are returned.
#
# When an ERROR is encountered, fix_error is called. FixError
# takes the arguments to the parser, fixes the error if possible and
# returns a new set of arguments.
#
# * The distance-parser:
#
# This parser includes an additional argument distance. It pushes
# elements on the queue until it has parsed distance tokens, or an
# ACCEPT or ERROR occurs. It returns a stack, lexer, the number of
# tokens left unparsed, a queue, and an action option.
# Compiled by:
#
src/lib/std/standard.libapi Yacc_Fifo {
#
Queue(X);
empty: Queue(X);
exception EMPTY;
get: Queue(X) -> (X, Queue(X));
put: (X, Queue(X)) -> Queue(X);
};
# drt (12/15/89) -- the generic should be used in development work, but
# it is wasted space in the release version.
#
# generic parser_gen_g (package lr_table: LR_TABLE
# package stream: STREAM) : LR_PARSER =
#
stipulate
package fil = file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkgherein
package lr_parser
: Lr_Parser # Lr_Parser is from
src/app/yacc/lib/base.api {
#
package lr_table # Exported to client packages.
= lr_table; # lr_table is from
src/app/yacc/lib/lrtable.pkg package stream # Exported to client packages.
= stream; # stream is from
src/app/yacc/lib/stream.pkg fun eq_t (lr_table::TERM i, lr_table::TERM i')
=
i == i';
package token # Exported to client packages.
: (weak) Token # Token is from
src/app/yacc/lib/base.api {
package lr_table
= lr_table; # lr_table is from
src/app/yacc/lib/lrtable.pkg Token (X,Y)
=
TOKEN (lr_table::Terminal, ((X, Y, Y)));
same_token
=
\\ (TOKEN (t, _), TOKEN (t', _)) = eq_t (t, t');
};
include package lr_table;
include package token;
debug1_flag = FALSE;
debug2_flag = FALSE;
exception PARSE_ERROR;
exception PARSE_IMPOSSIBLE Int;
package fifo: Yacc_Fifo { # Yacc_Fifo is from
src/app/yacc/lib/parser2.pkg Queue(X)
=
(List(X), List(X));
empty = (NIL, NIL);
exception EMPTY;
fun get (a ! x, y) => (a, (x, y));
get (NIL, NIL) => raise exception EMPTY;
get (NIL, y) => get (reverse y, NIL);
end;
fun put (a, (x, y)) = (x, a ! y);
};
Element (X,Y)
=
(State, ((X, Y, Y)));
Stack (X,Y)
=
List( Element (X, Y) );
Lexv (X,Y)
=
Token (X, Y);
Lexpair (X,Y)
=
(Lexv (X, Y), stream::Stream( Lexv (X, Y)));
Distance_Parse (X,Y)
=
( Lexpair (X,Y),
Stack (X,Y),
fifo::Queue ((Stack(X,Y), Lexpair(X,Y)) ),
Int
)
->
( Lexpair (X,Y),
Stack (X,Y),
fifo::Queue ((Stack (X,Y), Lexpair(X,Y)) ),
Int,
Null_Or( Action )
);
Error_Recovery_Info (X,Y)
=
{ is_keyword: Terminal -> Bool,
preferred_change: List( (List( Terminal ), List( Terminal )) ),
error: (String, Y, Y) -> Void,
errtermvalue: Terminal -> X,
terms: List( Terminal ),
show_terminal: Terminal -> String,
no_shift: Terminal -> Bool
};
stipulate
print = \\ s = fil::write (fil::stdout, s);
println = \\ s = { print s; print "\n"; };
#
show_state = \\ (STATE s) = "STATE " + (int::to_string s);
herein
fun print_stack (stack: Stack( X, Y ), n: Int)
=
case stack
#
(state, _) ! rest
=>
{ print("\t" + int::to_string n + ": ");
println (show_state state);
print_stack (rest, n+1);
};
NIL => ();
esac;
fun pr_action
#
show_terminal
#
( stack as (state, _) ! _,
next as (TOKEN (term, _), _),
action
)
=>
{ println "Parse: state stack:";
#
print_stack (stack, 0);
#
print(" state="
+ show_state state
+ " next="
+ show_terminal term
+ " action="
);
case action
#
SHIFT state => println ("SHIFT " + (show_state state));
REDUCE i => println ("REDUCE " + (int::to_string i));
ERROR => println "ERROR";
ACCEPT => println "ACCEPT";
esac;
};
pr_action _ (_, _, action)
=>
();
end;
end;
# steadystate_parse: parser which maintains the
# queue of (State, Lexvalues) in a steady-state.
#
# It takes a table, show_terminal function, saction
# function, and fix_error function.
#
# It parses until an ACCEPT is encountered or an
# exception is raised. When an error is encountered,
# fix_error is called with the arguments of
# parseStep (lexv, stack, and queue).
#
# It returns the lexv, and a new stack and queue
# adjusted so that the lexv can be parsed
#
steadystate_parse
=
\\ (table, show_terminal, saction, fix_error, arg)
=
parse_step
where
#
pr_action = pr_action show_terminal;
action = lr_table::action table;
goto = lr_table::goto table;
fun parse_step (args as
(lex_pair as (TOKEN (terminal, value as (_, left_pos, _)),
lexer
),
stack as (state, _) ! _,
queue))
=>
{ next_action
=
action (state, terminal);
if debug1_flag
pr_action (stack, lex_pair, next_action);
fi;
case next_action
SHIFT s
=>
{ new_stack
=
(s, value) ! stack;
new_lex_pair
=
stream::get lexer;
my (_, new_queue)
=
fifo::get (fifo::put((new_stack, new_lex_pair),
queue));
parse_step (new_lex_pair, (s, value) ! stack, new_queue);
};
REDUCE i
=>
case (saction (i, left_pos, stack, arg))
(nonterm, value, stack as (state, _) ! _)
=>
parse_step (lex_pair, (goto (state, nonterm), value) ! stack, queue);
_ =>
raise exception (PARSE_IMPOSSIBLE 197);
esac;
ERROR
=>
parse_step (fix_error args);
ACCEPT
=>
case stack
#
(_, (topvalue, _, _)) ! _
=>
{ my (token, rest_lexer)
=
lex_pair;
(topvalue, stream::cons (token, rest_lexer));
};
_ =>
raise exception (PARSE_IMPOSSIBLE 202);
esac;
esac;
};
parse_step _
=>
raise exception (PARSE_IMPOSSIBLE 204);
end;
end;
# distance_parse: parse until n tokens are shifted or
# accept or error are encountered.
#
# Takes a table, show_terminal function, and semantic action function.
#
# Returns a parser which takes a lex_pair
# (lex result * lexer), a state stack, a queue, and a distance
# (must be > 0) to parse.
#
# The parser returns a new lex-value, a stack
# with the nth token shifted on top, a queue, a distance, and action
# option.
#
distance_parse
=
\\ (table, show_terminal, saction, arg)
=
(parse_step: Distance_Parse( X, Y ))
where
pr_action = pr_action show_terminal;
action = lr_table::action table;
goto = lr_table::goto table;
fun parse_step (lex_pair, stack, queue, 0)
=>
(lex_pair, stack, queue, 0, NULL);
parse_step (lex_pair as (TOKEN (terminal, value as (_, left_pos, _)),
lexer
),
stack as (state, _) ! _,
queue, distance)
=>
{ next_action
=
action (state, terminal);
if debug1_flag
pr_action (stack, lex_pair, next_action);
fi;
case next_action
#
SHIFT s
=>
{ new_stack = (s, value) ! stack;
new_lex_pair = stream::get lexer;
parse_step
( new_lex_pair,
(s, value) ! stack,
fifo::put((new_stack, new_lex_pair), queue),
distance - 1
);
};
REDUCE i
=>
case (saction (i, left_pos, stack, arg))
(nonterm, value, stack as (state, _) ! _)
=>
parse_step (lex_pair, (goto (state, nonterm), value) ! stack,
queue, distance);
_ =>
raise exception (PARSE_IMPOSSIBLE 240);
esac;
ERROR => (lex_pair, stack, queue, distance, THE next_action);
ACCEPT => (lex_pair, stack, queue, distance, THE next_action);
esac;
};
parse_step _
=>
raise exception (PARSE_IMPOSSIBLE 242);
end;
end;
# make_fix_error: function to create fix_error function which adjusts parser state
# so that parse may continue in the presence of an error
#
fun make_fix_error
(
{ is_keyword, terms, errtermvalue, preferred_change, no_shift, show_terminal, error, ... } : Error_Recovery_Info(X, Y),
distance_parse: Distance_Parse(X, Y),
min_advance,
max_advance
)
#
( lexv as (TOKEN (term, value as (_, left_pos, _)), _),
stack,
queue
)
=
{ if debug2_flag error("syntax error found at " + (show_terminal term), left_pos, left_pos); fi;
#
fun tok_at (t, p)
=
TOKEN (t, (errtermvalue t, p, p));
min_delta = 3;
# Pull all the (state, lexv)
# elements from the queue:
#
state_list
=
f queue
where
fun f q
=
{ (fifo::get q)
->
(element, new_queue);
element ! (f new_queue);
}
except
fifo::EMPTY = NIL;
end;
# Now number elements of state_list,
# giving distance from error token
my (_, num_state_list)
=
list::fold_backward
(\\ (a, (num, r))
=
(num+1, (a, num) ! r)
)
(0, [])
state_list;
# Represent the set of potential changes as a linked list.
#
# Values of sumtype Change hold information about a potential change.
#
# op = op to be applied
# pos = the # of the element in stateList that would be altered.
# distance = the number of tokens beyond the error token which the
# change allows us to parse.
# new = new terminal * value pair at that point
# orig = original terminal * value pair at the point being changed.
Change (X,Y)
=
CHANGE
{ pos: Int,
distance: Int,
left_pos: Y,
right_pos: Y,
new: List( Lexv(X,Y) ),
orig: List( Lexv(X,Y) )
};
show_terms
=
cat o map (\\ TOKEN (t, _) = " " + show_terminal t);
print_change
=
\\ c
=
{ c -> CHANGE { distance, new, orig, pos, ... };
#
print ("{ distance= " + (int::to_string distance));
print (", orig ="); print (show_terms orig);
print (", new ="); print (show_terms new);
print (", pos= " + (int::to_string pos));
print "}\n";
};
print_change_list
=
apply print_change;
# parse: Given a lex_pair, a stack, and the distance from the error token
# return the distance past the error token that we are able to parse.
fun parse (lex_pair, stack, queue_pos: Int)
=
case (distance_parse (lex_pair, stack, fifo::empty, queue_pos+max_advance+1))
#
(_, _, _, distance, THE ACCEPT)
=>
if (max_advance-distance - 1 >= 0) max_advance;
else max_advance - distance - 1;
fi;
(_, _, _, distance, _)
=>
max_advance - distance - 1;
esac;
# cat_list: concatenate results of scanning list
fun cat_list l f
=
list::fold_backward
(\\ (a, r) = f a @ r)
[]
l;
fun keywords_delta new
=
if (list::exists
(\\ (TOKEN (t, _)) = is_keyword t)
new
)
min_delta;
else 0;
fi;
fun try_change { lex, stack, pos, left_pos, right_pos, orig, new }
=
{ lex' = list::fold_backward
(\\ (t', p) = (t', stream::cons p))
lex
new;
distance = parse (lex', stack, pos + (length new) - (length orig));
if (distance >= min_advance + keywords_delta new)
#
[ CHANGE { pos, left_pos, right_pos, distance, orig, new } ];
else
[];
fi;
};
# try_delete: Try to delete n terminals.
# Return single-element [success] or NIL.
# Do not delete unshiftable terminals.
fun try_delete n ((stack, lex_pair as (TOKEN (term, (_, l, r)), _)), q_pos)
=
del (n, [], l, r, lex_pair)
where
fun del (0, accum, left, right, lex_pair)
=>
try_change { lex=>lex_pair, stack,
pos=>q_pos, left_pos=>left, right_pos=>right,
orig=>reverse accum, new=> []
};
del (n, accum, left, right, (tok as TOKEN (term, (_, _, r)), lexer))
=>
if (no_shift term) [];
else del (n - 1, tok ! accum, left, r, stream::get lexer);
fi;
end;
end;
# try_insert: Try to insert tokens before the current terminal.
# Return a list of the successes.
fun try_insert((stack, lex_pair as (TOKEN(_, (_, l, _)), _)), queue_pos)
=
cat_list
terms
(\\ t = try_change
{ lex => lex_pair,
stack,
#
pos => queue_pos,
#
orig => [],
new => [tok_at (t, l)],
#
left_pos => l,
right_pos => l
}
);
# try_subst: Try to substitute tokens for the current terminal.
# Return a list of the successes
fun try_subst ((stack, lex_pair as (orig as TOKEN (term, (_, l, r)), lexer)), queue_pos)
=
if (no_shift term)
#
[];
else
cat_list
terms
(\\ t
=
try_change { lex=>stream::get lexer, stack,
pos=>queue_pos,
left_pos=>l, right_pos=>r, orig => [orig],
new=> [tok_at (t, r)] }
);
fi;
# do_delete (toks, lex_pair) tries to delete tokens "toks" from "lex_pair".
# If it succeeds, returns THE (toks', l, r, lp), where
# toks' is the actual tokens (with positions and values) deleted,
# (l, r) are the (leftmost, rightmost) position of toks',
# lp is what remains of the stream after deletion
#
fun do_delete (NIL, lp as (TOKEN(_, (_, l, _)), _))
=>
THE (NIL, l, l, lp);
do_delete([t], (tok as TOKEN (t', (_, l, r)), lp'))
=>
if (eq_t (t, t')) THE([tok], l, r, stream::get lp');
else NULL;
fi;
do_delete (t ! rest, (tok as TOKEN (t', (_, l, r)), lp'))
=>
if (eq_t (t, t'))
#
case (do_delete (rest, stream::get lp'))
#
THE (deleted, l', r', lp'')
=>
THE (tok ! deleted, l, r', lp'');
NULL => NULL;
esac;
else
NULL;
fi;
end;
fun try_preferred((stack, lex_pair), queue_pos)
=
cat_list
preferred_change
(\\ (delete, insert)
=
if (list::exists no_shift delete)
#
[]; # should give warning at # parser-generation time
else
case (do_delete (delete, lex_pair))
#
THE (deleted, l, r, lp)
=>
try_change
{
stack,
lex => lp,
#
pos => queue_pos,
#
left_pos => l,
right_pos => r,
#
orig => deleted,
new => map (\\ t= (tok_at (t, r))) insert
};
NULL => [];
esac;
fi
);
changes
=
cat_list num_state_list try_preferred @
cat_list num_state_list try_insert @
cat_list num_state_list try_subst @
cat_list num_state_list (try_delete 1) @
cat_list num_state_list (try_delete 2) @
cat_list num_state_list (try_delete 3) ;
find_max_dist
=
\\ l
=
fold_backward
(\\ (CHANGE { distance, ... }, high) = int::max (distance, high))
0
l;
# max_dist: max distance past error taken that we could parse
max_dist
=
find_max_dist changes;
# Remove changes which did not parse maxDist tokens past the error token
changes
=
cat_list
changes
(\\ (c as CHANGE { distance, ... } )
=
if (distance == max_dist) [c];
else [];
fi
);
case changes
#
(l as change ! _)
=>
(lex_pair, stack, queue)
where
fun print_msg (CHANGE { new, orig, left_pos, right_pos, ... } )
=
{ s = case (orig, new)
#
(_ ! _, []) => "deleting " + (show_terms orig);
#
([], _ ! _) => "inserting " + (show_terms new);
#
_ => "replacing " + (show_terms orig)
+ " with "
+ (show_terms new);
esac;
error ("syntax error: " + s, left_pos, right_pos);
};
if (length l > 1 and debug2_flag)
#
print "multiple fixes possible; could fix it by:\n";
apply print_msg l;
print "chosen correction:\n";
fi;
print_msg change;
# find_nth: Find nth queue entry from the error entry.
# Returns the Nth queue entry and the portion of
# the queue from the beginning to the nth - 1 entry.
# The error entry is at the end of the queue.
#
# Examples:
# queue = a b c d e
# findNth 0 = (e, a b c d)
# findNth 1 = (d, a b c)
find_nth
=
\\ n = f (reverse state_list, n)
where
fun f (h ! t, 0) => (h, reverse t);
#
f (h ! t, n) => f (t, n - 1);
f (NIL, _)
=>
{ exception FIND_NTH;
raise exception FIND_NTH;
};
end;
end;
change -> CHANGE { pos, orig, new, ... };
(find_nth pos) -> (last, queue_front);
last -> (stack, lex_pair);
lp1 = fold_forward (\\ (_, (_, r)) => stream::get r; end ) lex_pair orig;
lp2 = fold_backward (\\ (t, r)=>(t, stream::cons r); end ) lp1 new;
rest_queue
=
fifo::put((stack, lp2),
fold_forward fifo::put fifo::empty queue_front);
(distance_parse (lp2, stack, rest_queue, pos))
->
(lex_pair, stack, queue, _, _);
end;
NIL =>
{ error ("syntax error found at " + (show_terminal term), left_pos, left_pos);
#
raise exception PARSE_ERROR;
};
esac;
};
parse
=
\\ { arg,
table,
lexer,
saction,
void,
lookahead,
error_recovery as { show_terminal, ... } : Error_Recovery_Info (X, Y)
}
=
loop (distance_parse (lex_pair, start_stack, start_queue, distance))
where
distance = 15; # Defer distance tokens
min_advance = 1; # Must parse at least 1 token past error
max_advance = int::max (lookahead, 0); # max distance for parse check
lex_pair = stream::get lexer;
lex_pair -> (TOKEN (_, (_, left_pos, _)), _);
start_stack = [ (initial_state table, (void, left_pos, left_pos)) ];
start_queue = fifo::put((start_stack, lex_pair), fifo::empty);
distance_parse
=
distance_parse (table, show_terminal, saction, arg);
fix_error = make_fix_error (error_recovery, distance_parse, min_advance, max_advance);
steadystate_parse
=
steadystate_parse (table, show_terminal, saction, fix_error, arg);
fun loop (lex_pair, stack, queue, _, THE ACCEPT)
=>
steadystate_parse (lex_pair, stack, queue);
loop (lex_pair, stack, queue, 0, _)
=>
steadystate_parse (lex_pair, stack, queue);
loop (lex_pair, stack, queue, distance, THE ERROR)
=>
{ (fix_error (lex_pair, stack, queue))
->
(lex_pair, stack, queue);
#
loop (distance_parse (lex_pair, stack, queue, distance));
};
loop _ => { exception PARSE_INTERNAL;
#
raise exception PARSE_INTERNAL;
};
end;
end;
};
end;