## resolve-operator-precedence.pkg
# Compiled by:
#
src/lib/compiler/front/typer/typer.sublib####################################################
# Motivation
#
# We allow the user to specify the precedence
# and associativity of infix operators.
#
# Rather than try to hack support for that into
# the parser proper, we have the parser pass
# expressions through in an undigested flat-list
# form, from which we can later reconstruct the
# actual appropriate syntax tree representation
# -- here.
####################################################
# Overview
#
# Ths is the schematic problem
# we're trying to solve here:
#
# We have a set of LEAF symbols: a b c ...
# We have a set of BINOP symbols: * + << ...
#
# We recognize two kinds of expressions which may
# be formed from these components:
# f a # Functional application of one leaf to another
# a + b # Application of a BINOP to two adjacent leafs.
#
# Our input is a flat list of LEAF and BINOP
# symbols, where the BINOP symbols are tagged
# with arbitrary precedences, and also marked
# as to whether they are right or left associative.
#
# Our task is to resolve that input list into
# a syntax tree which makes the evaluation
# relationships explicit.
#
# For example,
#
# f a b c + g d * h e
#
# should yield approximately
#
# +
# / \
# / \
# / \
# . *
# / \ / \
# . c / \
# / \ . .
# . b / \ / \
# / \ g d h e
# f a
#
# where we are using '.' to represent functional
# application of the first argument to the second.
#
# (Note in particular that '*' has higher precedence
# than '+', which is reflected in the parse tree shown.)
####################################################
# Algorithm
#
# We use a conventional linear left-to-right scan
# over the input with a stack to hold our
# intermediate partial-syntax-tree results.
#
# Thus, at any given time we have a state consisting
# of the current stack of partial syntax trees together
# with the remaining input tokens, and we are looking
# at the next input token trying to figure out what
# to do next.
#
# Our algorithm consists of three processing rules:
#
# 0) Our default action when we don't know what else
# to do is just to push the input token on our
# stack and go on.
#
# 1) If there's a LEAF F on top of the stack and
# the next input token is a LEAF A, then
# we can combine them into a (F . A) node
# (representing functional application of F to A)
# and push the result back on the stack as a new
# LEAF node.
#
# 2) If the top three items on the stack are:
#
# A * B
#
# (i.e., two LEAFs separated by a BINOP)
# and the next input token is
#
# +
#
# (i.e., a lower-precedence BINOP) then we
# can combine those top three entries into
# a new LEAF node (A*B), push it back on
# the stack, and continue.
#
# And that's basically all we have to do!
#
# There are a few corner cases like issuing
# error messges for syntactically illegal
# situations like a BINOP at the start or
# end of the input list, and we have to
# flush any remaining stuff out of the stack
# when we reach end of input, but basically
# the above three cases get the job done.
####################################################
# Code context
#
# We get called (only) from two places in
#
src/lib/compiler/front/typer/main/type-core-language.pkg# once to resolve regular executable expressions,
# once to resolve pattern-match expressions.
#
# Our 'parse' entry point is applied in two stages:
#
# (1) Called initially with a pair of functions
# 'apply' and 'pair' with which to build up
# the resulting parsetree, by doing
#
# apply (f, a)
#
# to construct a syntax tree node
# encoding the functional application
# of 'f' to 'a', and doing
#
# apply( o, pair (a, b) )
#
# to encode the application of BINOP 'o' to
# arguments 'a' and 'b'.
#
# (Passing in the syntax-tree construction
# functions in this way makes us independent
# of the details of syntax tree construction,
# in particular whether we're building an
# expression or pattern-matching syntax tree.)
#
# At this point we simply latch the given
# 'apply' and 'pair' functions in a closure
# and return the closure.
#
# Essentially, this stage produces a version
# of our 'parse' function specialized to the
# particular type of syntax tree construction
# problem at hand.
#
# (2) The result of the above is then called repeatedly
# to convert particular expressions from flat-list
# form to fully-resolved syntax-tree form.
#
# This call takes as input a tuple of three elements:
#
# 1. The input expression as a flat list.
#
# For our purposes, each entry is list just a blob
# with precedence and associativity information.
# (The full definition is in
#
src/lib/compiler/front/parser/raw-syntax/raw-syntax.pkg# )
#
# 2. The current syntax table.
#
# 3. A sink for error messages.
#
# We then return the resulting syntax tree.
api Resolve_Operator_Precedence {
parse
:
{ apply: (X, X) -> X,
pair: (X, X) -> X
}
->
( List( raw_syntax::Fixity_Item(X) ), # Input list.
symbolmapstack::Symbolmapstack, # To look up binop precedences.
(raw_syntax::Source_Code_Region -> error_message::Plaint_Sink) # Error message sink.
)
->
X;
};
stipulate
package err= error_message;
package f = fixity; # fixity is from
src/lib/compiler/front/basics/map/fixity.pkgherein
package resolve_operator_precedence
: (weak) Resolve_Operator_Precedence # Resolve_Operator_Precedence is from
src/lib/compiler/front/typer/main/resolve-operator-precedence.pkg {
# Define our evaluation stack.
#
# For a LEAF we just store its syntax tree.
#
# For a BINOP we store the syntax tree plus
# a precedence plus the binop symbol itself,
# for error message purposes.
#
# Both of the above end with a pointer
# to the next node in the stack:
#
Precedence_Stack X
= BINOP (symbol::Symbol, Int, X, Precedence_Stack(X))
| LEAF (X, Precedence_Stack(X))
| BOTTOM_OF_STACK;
##################################################################
# 'parse': Our main external entry point (stage 1).
#
# All we do at this point is to latch
# 'apply' and 'pair' in a closure,
# which we then return:
#
fun parse { apply, pair }
=
{
##################################################################
# 'ensure_leaf': We call this function when we're in
# a situation, such as processing of
# the first input token, in which a
# binary infix operator is not
# syntactically valid.
#
# If the input token -is- in fact
# infix, we issue an error message.
#
# Either way, we push the input token
# on the stack as a LEAF node, and
# return the new stack.
#
fun ensure_leaf (stack, (expression, f::NONFIX, _, err))
=>
LEAF (expression, stack);
ensure_leaf (stack, (expression, f::INFIX _, THE symbol, err))
=>
{ err
err::ERROR
( "expression or pattern begins with infix identifier \""
+ symbol::name symbol
+ "\""
)
err::null_error_body;
LEAF (expression, stack);
};
ensure_leaf _
=>
err::impossible "precedence: ensureLEAF";
end;
##################################################################
# 'parse_item': Process one input item.
#
# First argument is our current result-expression stack.
# Second argument is current input item to process.
# We return the updated result-expression stack.
#
# Basically, we just compare the LEAF/BINOP type
# (and in the latter case, precedence) of the input
# token with those of the top one or two entries
# on the expression stack and then do the obvious.
# (See algorithm discussion at top of file.)
######################
# [...LEAF]-LEAF case: Two consecutive non-infix expressions
# represent function application of
# first to second:
#
fun parse_item (
LEAF (expression1, rest_of_stack),
(expression2, f::NONFIX, _, err)
)
=>
LEAF (apply (expression1, expression2), rest_of_stack);
######################
# [...BINOP]-* case: We don't allow two binary
# operators in a row, so when
# we have an BINOP at top of
# stack, issue an error message
# if the input token is also BINOP.
#
# Either way, push the input token
# on the stack as a LEAF node
# and return the new stack:
#
parse_item (stack as BINOP _, token)
=>
ensure_leaf (stack, token);
######################
# [...LEAF, BINOP, LEAF]-BINOP case: In this situation, the two BINOPs
# are competing for the right
# to eat the LEAF between them.
#
# If the first BINOP has higher precedence,
# it can go ahead and eat the two LEAFs
# adjacent to it.
#
# If the second BINOP has higher precedence,
# then we need to push it on the stack and
# wait for its second argument to arrive.
#
parse_item (
stack as LEAF (expression1, BINOP (_, precedence2, expression2, LEAF (expression3, rest_of_stack))
),
( expression4,
f as f::INFIX (precedence4left, precedence4right), # Latter two are identical except for associativity-encoding low bit.
THE symbol,
err
)
)
=>
if (precedence4left > precedence2)
# Second BINOP has higher precedence, so
# push it on the stack to await its second
# argument and return the new stack:
#
BINOP (symbol, precedence4right, expression4, stack);
else
if (precedence4left == precedence2)
err
err::WARNING
"mixed left- and right-associative \
\operators of same precedence"
err::null_error_body;
fi;
# First BINOP operator has higher precedence,
# so go ahead and pop the top three stack
# entries (the BINOP plus its two LEAF args),
# create a syntax tree node applying the BINOP
# to its two LEAF arguments, push the newly
# created expression back on the stack as a new
# LEAF node, and call ourselves recursively,
# since we still haven't eaten the input token:
#
parse_item (
LEAF (
apply (
expression2,
pair (expression3, expression1)
),
rest_of_stack
),
( expression4,
f,
THE symbol,
err
)
);
fi;
######################
# [...LEAF]-BINOP case: We can't do anything with the BINOP
# until we arrive at its second argument,
# so we just push it on the stack.
#
parse_item (
stack as LEAF _,
( expression,
f::INFIX (precedence_left, precedence_right),
THE symbol,
_
)
)
=>
BINOP (symbol, precedence_right, expression, stack);
parse_item _
=>
err::impossible "Precedence::parse";
end;
##################################################################
# 'finish': Clean up the stack
#
fun finish (
LEAF (
e1,
BINOP (_, _, e2, LEAF (e3, r))
),
err
)
=>
finish (
LEAF (
apply (e2, pair (e3, e1)),
r
),
err
);
finish (LEAF (e1, BOTTOM_OF_STACK), _)
=>
e1;
finish (BINOP (symbol, _, e1, LEAF (e2, p)), err)
=>
{ err
err::ERROR
( "expression or pattern ends with infix identifier \""
+ symbol::name symbol
+ "\""
)
err::null_error_body;
finish (
LEAF (
apply (e2, e1),
p
),
err
);
};
finish (BOTTOM_OF_STACK, err)
=>
err::impossible "Corelang::finish BOTTOM_OF_STACK";
finish _
=>
err::impossible "Corelang::finish";
end;
# The following anonymous function is our return
# value from stage-one application (where all
# we do is note our 'apply' and 'pair' functions).
#
\\ ( all_input_items as first_item ! nonfirst_items, # The input expression as a flat list.
symbolmapstack, # The current symbol table.
error # The sink for our error messages.
)
=>
{ #########################################
# Extract the fixity information we need
# from one input item by way of lookup
# in the symbol table.
#
# Our return value is essentially the same
# input item in a more convenient representation:
#
fun unpack_input_item { item, source_code_region, fixity }
=
( item,
case fixity
NULL => f::NONFIX;
THE symbol => find_in_symbolmapstack::find_fixity_by_symbol (symbolmapstack, symbol);
esac,
fixity,
error source_code_region
);
#########################################
# For error message purposes, figure out
# our end-of-input location in the source code.
#
# Our input is the complete flat-form input-item list:
#
fun endloc [ { source_code_region => (_, x), item, fixity } ]
=>
error (x, x);
endloc (_ ! a) => endloc a;
endloc _ => err::impossible "precedence: endloc";
end;
#########################################
# 'loop' is our central loop-over-the-list-of-input-items function.
# The first item is our result-expression stack.
# The second argument is our list of input items remaining to be processed.
# Our return result is the fully resolved syntax tree.
#
fun loop (stack, input_item ! remaining_items) # Process one input item.
=>
loop (parse_item (stack, unpack_input_item input_item), remaining_items);
loop (stack, NIL) # No-more-input case -- done.
=>
finish (stack, endloc all_input_items);
end;
#########################################
# 'make_initial_stack' is our start-of-the-main-loop
# initialization function for stage-two
# processing.
#
# Our 'token' input argument is the fixity
# info for the first token in our flat-form
# input-syntax list.
#
# Our result is the initial stack:
#
fun make_initial_stack token
=
ensure_leaf (BOTTOM_OF_STACK, token);
# Process all input and return final result:
#
loop (make_initial_stack (unpack_input_item first_item), nonfirst_items);
};
_ => err::impossible "precedence: parse";
end;
};
}; # package resolve_operator_precedence
end; # stipulate